diff --git a/mesa-7.10-nouveau-classic-libdrm.patch b/mesa-7.10-nouveau-classic-libdrm.patch new file mode 100644 index 0000000..bb787ef --- /dev/null +++ b/mesa-7.10-nouveau-classic-libdrm.patch @@ -0,0 +1,33 @@ +From c43905b6c43ac34bbcb5a9e44414fa500e5cebeb Mon Sep 17 00:00:00 2001 +From: Ben Skeggs +Date: Mon, 17 Jan 2011 12:54:27 +1000 +Subject: [PATCH 3/3] mesa-7.10-nouveau-classic-libdrm + +Signed-off-by: Ben Skeggs +--- + src/mesa/drivers/dri/nouveau/nouveau_driver.h | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h +index 8036b18..c5ac128 100644 +--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.h ++++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h +@@ -38,7 +38,6 @@ + #include + + #include "nouveau_device.h" +-#include "nouveau_pushbuf.h" + #include "nouveau_grobj.h" + #include "nouveau_channel.h" + #include "nouveau_bo.h" +@@ -46,6 +45,7 @@ + #include "nouveau_screen.h" + #include "nouveau_state.h" + #include "nouveau_surface.h" ++#include "nv04_pushbuf.h" + + #define DRIVER_DATE "20091015" + #define DRIVER_AUTHOR "Nouveau" +-- +1.7.3.4 + diff --git a/mesa-7.10-nouveau-revert.patch b/mesa-7.10-nouveau-revert.patch new file mode 100644 index 0000000..d88d565 --- /dev/null +++ b/mesa-7.10-nouveau-revert.patch @@ -0,0 +1,114 @@ +From 20f77c532547c7f8f1cb26f41357037821cf2a9f Mon Sep 17 00:00:00 2001 +From: Ben Skeggs +Date: Mon, 17 Jan 2011 12:47:12 +1000 +Subject: [PATCH 2/3] mesa-7.10-nouveau-revert.patch + +Signed-off-by: Ben Skeggs +--- + src/gallium/drivers/nv50/nv50_context.c | 4 ---- + src/gallium/drivers/nv50/nv50_shader_state.c | 2 +- + src/gallium/drivers/nv50/nv50_state.c | 5 ++--- + src/gallium/drivers/nv50/nv50_vbo.c | 2 +- + src/gallium/drivers/nvc0/nvc0_screen.c | 2 +- + src/gallium/drivers/nvfx/nvfx_vbo.c | 14 +++++++++++--- + 6 files changed, 16 insertions(+), 13 deletions(-) + +diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c +index 4f97616..0874cb5 100644 +--- a/src/gallium/drivers/nv50/nv50_context.c ++++ b/src/gallium/drivers/nv50/nv50_context.c +@@ -49,10 +49,6 @@ nv50_destroy(struct pipe_context *pipe) + struct nv50_context *nv50 = nv50_context(pipe); + int i; + +- for (i = 0; i < nv50->vtxbuf_nr; i++) { +- pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); +- } +- + for (i = 0; i < 64; i++) { + if (!nv50->state.hw[i]) + continue; +diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c +index 1c1b66d..306aa81 100644 +--- a/src/gallium/drivers/nv50/nv50_shader_state.c ++++ b/src/gallium/drivers/nv50/nv50_shader_state.c +@@ -71,7 +71,7 @@ nv50_transfer_constbuf(struct nv50_context *nv50, + map += nr; + } + +- pipe_buffer_unmap(pipe, transfer); ++ pipe_buffer_unmap(pipe, buf, transfer); + } + + static void +diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c +index b4eda0f..04291e4 100644 +--- a/src/gallium/drivers/nv50/nv50_state.c ++++ b/src/gallium/drivers/nv50/nv50_state.c +@@ -779,9 +779,8 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + { + struct nv50_context *nv50 = nv50_context(pipe); + +- util_copy_vertex_buffers(nv50->vtxbuf, +- &nv50->vtxbuf_nr, +- vb, count); ++ memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); ++ nv50->vtxbuf_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; + } +diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c +index 53f319a..d41a59d 100644 +--- a/src/gallium/drivers/nv50/nv50_vbo.c ++++ b/src/gallium/drivers/nv50/nv50_vbo.c +@@ -284,7 +284,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, + nzi = TRUE; + } + +- pipe_buffer_unmap(pipe, transfer); ++ pipe_buffer_unmap(pipe, indexBuffer, transfer); + } + + static void +diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c +index f608b32..e14d4fd 100644 +--- a/src/gallium/drivers/nvc0/nvc0_screen.c ++++ b/src/gallium/drivers/nvc0/nvc0_screen.c +@@ -110,7 +110,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: +- case PIPE_CAP_INSTANCED_DRAWING: ++// case PIPE_CAP_INSTANCED_DRAWING: + return 1; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); +diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c +index 01dacb4..1c88f5f 100644 +--- a/src/gallium/drivers/nvfx/nvfx_vbo.c ++++ b/src/gallium/drivers/nvfx/nvfx_vbo.c +@@ -591,10 +591,18 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + { + struct nvfx_context *nvfx = nvfx_context(pipe); + +- util_copy_vertex_buffers(nvfx->vtxbuf, +- &nvfx->vtxbuf_nr, +- vb, count); ++ for(unsigned i = 0; i < count; ++i) ++ { ++ pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); ++ nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; ++ nvfx->vtxbuf[i].max_index = vb[i].max_index; ++ nvfx->vtxbuf[i].stride = vb[i].stride; ++ } ++ ++ for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) ++ pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); + ++ nvfx->vtxbuf_nr = count; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; + } +-- +1.7.3.4 + diff --git a/mesa-7.10-nouveau-updates.patch b/mesa-7.10-nouveau-updates.patch new file mode 100644 index 0000000..b92bb1a --- /dev/null +++ b/mesa-7.10-nouveau-updates.patch @@ -0,0 +1,19963 @@ +From 417e136ecef44324035c2c124dd184f14af03c44 Mon Sep 17 00:00:00 2001 +From: Ben Skeggs +Date: Mon, 17 Jan 2011 12:44:46 +1000 +Subject: [PATCH 1/3] mesa-7.10-nouveau-updates + +Signed-off-by: Ben Skeggs +--- + configure.ac | 2 +- + src/gallium/drivers/nouveau/nouveau_screen.h | 3 +- + src/gallium/drivers/nouveau/nouveau_winsys.h | 7 +- + src/gallium/drivers/nouveau/nv_object.xml.h | 57 +- + src/gallium/drivers/nv50/nv50_context.c | 4 + + src/gallium/drivers/nv50/nv50_shader_state.c | 2 +- + src/gallium/drivers/nv50/nv50_state.c | 14 +- + src/gallium/drivers/nv50/nv50_surface.c | 2 +- + src/gallium/drivers/nv50/nv50_vbo.c | 2 +- + src/gallium/drivers/nvc0/Makefile | 34 + + src/gallium/drivers/nvc0/SConscript | 36 + + src/gallium/drivers/nvc0/nv50_defs.xml.h | 142 ++ + src/gallium/drivers/nvc0/nv50_texture.xml.h | 259 +++ + src/gallium/drivers/nvc0/nvc0_2d.xml.h | 380 ++++ + src/gallium/drivers/nvc0/nvc0_3d.xml.h | 1183 ++++++++++++ + src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h | 98 + + src/gallium/drivers/nvc0/nvc0_buffer.c | 489 +++++ + src/gallium/drivers/nvc0/nvc0_context.c | 164 ++ + src/gallium/drivers/nvc0/nvc0_context.h | 227 +++ + src/gallium/drivers/nvc0/nvc0_draw.c | 88 + + src/gallium/drivers/nvc0/nvc0_fence.c | 203 ++ + src/gallium/drivers/nvc0/nvc0_fence.h | 48 + + src/gallium/drivers/nvc0/nvc0_formats.c | 462 +++++ + src/gallium/drivers/nvc0/nvc0_graph_macros.h | 235 +++ + src/gallium/drivers/nvc0/nvc0_m2mf.xml.h | 138 ++ + src/gallium/drivers/nvc0/nvc0_miptree.c | 327 ++++ + src/gallium/drivers/nvc0/nvc0_mm.c | 274 +++ + src/gallium/drivers/nvc0/nvc0_pc.c | 693 +++++++ + src/gallium/drivers/nvc0/nvc0_pc.h | 653 +++++++ + src/gallium/drivers/nvc0/nvc0_pc_emit.c | 979 ++++++++++ + src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 1236 ++++++++++++ + src/gallium/drivers/nvc0/nvc0_pc_print.c | 377 ++++ + src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 927 +++++++++ + src/gallium/drivers/nvc0/nvc0_program.c | 694 +++++++ + src/gallium/drivers/nvc0/nvc0_program.h | 89 + + src/gallium/drivers/nvc0/nvc0_push.c | 289 +++ + src/gallium/drivers/nvc0/nvc0_push2.c | 333 ++++ + src/gallium/drivers/nvc0/nvc0_query.c | 337 ++++ + src/gallium/drivers/nvc0/nvc0_resource.c | 71 + + src/gallium/drivers/nvc0/nvc0_resource.h | 201 ++ + src/gallium/drivers/nvc0/nvc0_screen.c | 670 +++++++ + src/gallium/drivers/nvc0/nvc0_screen.h | 192 ++ + src/gallium/drivers/nvc0/nvc0_shader_state.c | 180 ++ + src/gallium/drivers/nvc0/nvc0_state.c | 865 +++++++++ + src/gallium/drivers/nvc0/nvc0_state_validate.c | 430 +++++ + src/gallium/drivers/nvc0/nvc0_stateobj.h | 82 + + src/gallium/drivers/nvc0/nvc0_surface.c | 377 ++++ + src/gallium/drivers/nvc0/nvc0_tex.c | 277 +++ + src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2018 ++++++++++++++++++++ + src/gallium/drivers/nvc0/nvc0_transfer.c | 381 ++++ + src/gallium/drivers/nvc0/nvc0_transfer.h | 38 + + src/gallium/drivers/nvc0/nvc0_vbo.c | 671 +++++++ + src/gallium/drivers/nvc0/nvc0_winsys.h | 120 ++ + src/gallium/drivers/nvfx/nv04_2d.c | 2 +- + src/gallium/drivers/nvfx/nv30_fragtex.c | 3 +- + src/gallium/drivers/nvfx/nv40_fragtex.c | 5 +- + src/gallium/drivers/nvfx/nvfx_context.c | 6 +- + src/gallium/drivers/nvfx/nvfx_context.h | 13 +- + src/gallium/drivers/nvfx/nvfx_draw.c | 14 +- + src/gallium/drivers/nvfx/nvfx_fragprog.c | 16 +- + src/gallium/drivers/nvfx/nvfx_fragtex.c | 4 +- + src/gallium/drivers/nvfx/nvfx_push.c | 57 +- + src/gallium/drivers/nvfx/nvfx_query.c | 12 +- + src/gallium/drivers/nvfx/nvfx_screen.c | 70 +- + src/gallium/drivers/nvfx/nvfx_state.c | 2 +- + src/gallium/drivers/nvfx/nvfx_state_emit.c | 68 +- + src/gallium/drivers/nvfx/nvfx_state_fb.c | 39 +- + src/gallium/drivers/nvfx/nvfx_surface.c | 8 +- + src/gallium/drivers/nvfx/nvfx_vbo.c | 39 +- + src/gallium/drivers/nvfx/nvfx_vertprog.c | 14 +- + src/gallium/targets/dri-nouveau/Makefile | 1 + + src/gallium/targets/xorg-nouveau/Makefile | 1 + + .../winsys/nouveau/drm/nouveau_drm_winsys.c | 3 + + src/mesa/drivers/dri/nouveau/nouveau_texture.c | 6 +- + 74 files changed, 18237 insertions(+), 206 deletions(-) + create mode 100644 src/gallium/drivers/nvc0/Makefile + create mode 100644 src/gallium/drivers/nvc0/SConscript + create mode 100644 src/gallium/drivers/nvc0/nv50_defs.xml.h + create mode 100644 src/gallium/drivers/nvc0/nv50_texture.xml.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_2d.xml.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_3d.xml.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_buffer.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_context.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_context.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_draw.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_fence.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_fence.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_formats.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_graph_macros.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_m2mf.xml.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_miptree.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_mm.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_emit.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_optimize.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_print.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_regalloc.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_program.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_program.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_push.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_push2.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_query.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_resource.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_resource.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_screen.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_screen.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_shader_state.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_state.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_state_validate.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_stateobj.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_surface.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_tex.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_transfer.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_transfer.h + create mode 100644 src/gallium/drivers/nvc0/nvc0_vbo.c + create mode 100644 src/gallium/drivers/nvc0/nvc0_winsys.h + +diff --git a/configure.ac b/configure.ac +index b451f7c..58fc79f 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1686,7 +1686,7 @@ AC_ARG_ENABLE([gallium-nouveau], + [enable_gallium_nouveau="$enableval"], + [enable_gallium_nouveau=no]) + if test "x$enable_gallium_nouveau" = xyes; then +- GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50" ++ GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0" + gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" + fi + +diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h +index 8c29027..1f4e517 100644 +--- a/src/gallium/drivers/nouveau/nouveau_screen.h ++++ b/src/gallium/drivers/nouveau/nouveau_screen.h +@@ -66,7 +66,7 @@ void nouveau_screen_fini(struct nouveau_screen *); + + + +- ++#ifndef NOUVEAU_NVC0 + static INLINE unsigned + RING_3D(unsigned mthd, unsigned size) + { +@@ -78,5 +78,6 @@ RING_3D_NI(unsigned mthd, unsigned size) + { + return 0x40000000 | (7 << 13) | (size << 18) | mthd; + } ++#endif + + #endif +diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h +index ab480ca..8dfb84a 100644 +--- a/src/gallium/drivers/nouveau/nouveau_winsys.h ++++ b/src/gallium/drivers/nouveau/nouveau_winsys.h +@@ -10,7 +10,9 @@ + #include "nouveau/nouveau_grobj.h" + #include "nouveau/nouveau_notifier.h" + #include "nouveau/nouveau_resource.h" +-#include "nouveau/nouveau_pushbuf.h" ++#ifndef NOUVEAU_NVC0 ++#include "nouveau/nv04_pushbuf.h" ++#endif + + #ifndef NV04_PFIFO_MAX_PACKET_LEN + #define NV04_PFIFO_MAX_PACKET_LEN 2047 +@@ -41,4 +43,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *); + extern struct pipe_screen * + nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); + ++extern struct pipe_screen * ++nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *); ++ + #endif +diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h +index cb7653c..a5b0d04 100644 +--- a/src/gallium/drivers/nouveau/nv_object.xml.h ++++ b/src/gallium/drivers/nouveau/nv_object.xml.h +@@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng + git clone git://0x04.net/rules-ng-ng + + The rules-ng-ng source files this header was generated from are: +-- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 07:53:14) +-- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50) +-- nv_3ddefs.xml ( 15193 bytes, from 2010-09-05 07:50:15) +-- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53) +-- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53) +-- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53) ++- nv_object.xml ( 11547 bytes, from 2010-10-24 15:29:34) ++- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) ++- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) ++- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) + + Copyright (C) 2006-2010 by the following authors: + - Artur Huillet (ahuillet) +@@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors: + - Mark Carey (careym) + - Matthieu Castet (mat-c) + - nvidiaman (nvidiaman) +-- Patrice Mandin (pmandin, pmdata) ++- Patrice Mandin (pmandin, pmdata) + - Pekka Paalanen (pq, ppaalanen) + - Peter Popov (ironpeter) + - Richard Hughes (hughsient) +@@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define NV50_COMPUTE 0x000050c0 + #define NVA3_COMPUTE 0x000085c0 + #define NVC0_COMPUTE 0x000090c0 ++#define NV84_CRYPT 0x000074c1 + #define NV01_SUBCHAN__SIZE 0x00002000 + #define NV01_SUBCHAN 0x00000000 + +@@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define NV84_SUBCHAN_QUERY_GET 0x0000001c + +-#define NV84_SUBCHAN_UNK20 0x00000020 ++#define NV84_SUBCHAN_QUERY_INTR 0x00000020 + +-#define NV84_SUBCHAN_UNK24 0x00000024 ++#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024 + + #define NV10_SUBCHAN_REF_CNT 0x00000050 + +@@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define NV11_SUBCHAN_SEMAPHORE_RELEASE 0x0000006c + +-#define NV50_SUBCHAN_UNK80 0x00000080 ++#define NV40_SUBCHAN_YIELD 0x00000080 + + #define NV01_GRAPH 0x00000000 + +@@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + #define NV40_GRAPH_PM_TRIGGER 0x00000140 + ++#define NVC0_SUBCHAN__SIZE 0x00008000 ++#define NVC0_SUBCHAN 0x00000000 ++ ++#define NVC0_SUBCHAN_OBJECT 0x00000000 ++ ++ ++#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010 ++ ++#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014 ++ ++#define NVC0_SUBCHAN_QUERY_SEQUENCE 0x00000018 ++ ++#define NVC0_SUBCHAN_QUERY_GET 0x0000001c ++ ++#define NVC0_SUBCHAN_REF_CNT 0x00000050 ++ ++#define NVC0_GRAPH 0x00000000 ++ ++#define NVC0_GRAPH_NOP 0x00000100 ++ ++#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH 0x00000104 ++ ++#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW 0x00000108 ++ ++#define NVC0_GRAPH_NOTIFY 0x0000010c ++#define NVC0_GRAPH_NOTIFY_WRITE 0x00000000 ++#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN 0x00000001 ++ ++#define NVC0_GRAPH_SERIALIZE 0x00000110 ++ ++#define NVC0_GRAPH_MACRO_UPLOAD_POS 0x00000114 ++ ++#define NVC0_GRAPH_MACRO_UPLOAD_DATA 0x00000118 ++ ++#define NVC0_GRAPH_MACRO_ID 0x0000011c ++ ++#define NVC0_GRAPH_MACRO_POS 0x00000120 ++ + + #endif /* NV_OBJECT_XML */ +diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c +index 0874cb5..4f97616 100644 +--- a/src/gallium/drivers/nv50/nv50_context.c ++++ b/src/gallium/drivers/nv50/nv50_context.c +@@ -49,6 +49,10 @@ nv50_destroy(struct pipe_context *pipe) + struct nv50_context *nv50 = nv50_context(pipe); + int i; + ++ for (i = 0; i < nv50->vtxbuf_nr; i++) { ++ pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); ++ } ++ + for (i = 0; i < 64; i++) { + if (!nv50->state.hw[i]) + continue; +diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c +index 306aa81..1c1b66d 100644 +--- a/src/gallium/drivers/nv50/nv50_shader_state.c ++++ b/src/gallium/drivers/nv50/nv50_shader_state.c +@@ -71,7 +71,7 @@ nv50_transfer_constbuf(struct nv50_context *nv50, + map += nr; + } + +- pipe_buffer_unmap(pipe, buf, transfer); ++ pipe_buffer_unmap(pipe, transfer); + } + + static void +diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c +index f42fa2d..b4eda0f 100644 +--- a/src/gallium/drivers/nv50/nv50_state.c ++++ b/src/gallium/drivers/nv50/nv50_state.c +@@ -721,17 +721,16 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct nv50_context *nv50 = nv50_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { +- nv50->constbuf[PIPE_SHADER_VERTEX] = buf; + nv50->dirty |= NV50_NEW_VERTPROG_CB; + } else + if (shader == PIPE_SHADER_FRAGMENT) { +- nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; + nv50->dirty |= NV50_NEW_FRAGPROG_CB; +- } else +- if (shader == PIPE_SHADER_GEOMETRY) { +- nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; ++ } else { ++ assert(shader == PIPE_SHADER_GEOMETRY); + nv50->dirty |= NV50_NEW_GEOMPROG_CB; + } ++ ++ pipe_resource_reference(&nv50->constbuf[shader], buf); + } + + static void +@@ -780,8 +779,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + { + struct nv50_context *nv50 = nv50_context(pipe); + +- memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); +- nv50->vtxbuf_nr = count; ++ util_copy_vertex_buffers(nv50->vtxbuf, ++ &nv50->vtxbuf_nr, ++ vb, count); + + nv50->dirty |= NV50_NEW_ARRAYS; + } +diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c +index ce48022..a99df76 100644 +--- a/src/gallium/drivers/nv50/nv50_surface.c ++++ b/src/gallium/drivers/nv50/nv50_surface.c +@@ -22,7 +22,7 @@ + + #define __NOUVEAU_PUSH_H__ + #include +-#include "nouveau/nouveau_pushbuf.h" ++#include "nouveau/nv04_pushbuf.h" + #include "nv50_context.h" + #include "nv50_resource.h" + #include "pipe/p_defines.h" +diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c +index d41a59d..53f319a 100644 +--- a/src/gallium/drivers/nv50/nv50_vbo.c ++++ b/src/gallium/drivers/nv50/nv50_vbo.c +@@ -284,7 +284,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, + nzi = TRUE; + } + +- pipe_buffer_unmap(pipe, indexBuffer, transfer); ++ pipe_buffer_unmap(pipe, transfer); + } + + static void +diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile +new file mode 100644 +index 0000000..da8f9a2 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/Makefile +@@ -0,0 +1,34 @@ ++TOP = ../../../.. ++include $(TOP)/configs/current ++ ++LIBNAME = nvc0 ++ ++C_SOURCES = \ ++ nvc0_buffer.c \ ++ nvc0_context.c \ ++ nvc0_draw.c \ ++ nvc0_formats.c \ ++ nvc0_miptree.c \ ++ nvc0_resource.c \ ++ nvc0_screen.c \ ++ nvc0_state.c \ ++ nvc0_state_validate.c \ ++ nvc0_surface.c \ ++ nvc0_tex.c \ ++ nvc0_transfer.c \ ++ nvc0_vbo.c \ ++ nvc0_program.c \ ++ nvc0_shader_state.c \ ++ nvc0_pc.c \ ++ nvc0_pc_print.c \ ++ nvc0_pc_emit.c \ ++ nvc0_tgsi_to_nc.c \ ++ nvc0_pc_optimize.c \ ++ nvc0_pc_regalloc.c \ ++ nvc0_push.c \ ++ nvc0_push2.c \ ++ nvc0_fence.c \ ++ nvc0_mm.c \ ++ nvc0_query.c ++ ++include ../../Makefile.template +diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript +new file mode 100644 +index 0000000..c49e0dd +--- /dev/null ++++ b/src/gallium/drivers/nvc0/SConscript +@@ -0,0 +1,36 @@ ++Import('*') ++ ++env = env.Clone() ++ ++nvc0 = env.ConvenienceLibrary( ++ target = 'nvc0', ++ source = [ ++ 'nvc0_buffer.c', ++ 'nvc0_context.c', ++ 'nvc0_draw.c', ++ 'nvc0_formats.c', ++ 'nvc0_miptree.c', ++ 'nvc0_resource.c', ++ 'nvc0_screen.c', ++ 'nvc0_state.c', ++ 'nvc0_state_validate.c', ++ 'nvc0_surface.c', ++ 'nvc0_tex.c', ++ 'nvc0_transfer.c', ++ 'nvc0_vbo.c', ++ 'nvc0_program.c', ++ 'nvc0_shader_state.c', ++ 'nvc0_pc.c', ++ 'nvc0_pc_print.c', ++ 'nvc0_pc_emit.c', ++ 'nvc0_tgsi_to_nc.c', ++ 'nvc0_pc_optimize.c', ++ 'nvc0_pc_regalloc.c', ++ 'nvc0_push.c', ++ 'nvc0_push2.c', ++ 'nvc0_fence.c', ++ 'nvc0_mm.c', ++ 'nvc0_query.c' ++ ]) ++ ++Export('nvc0') +diff --git a/src/gallium/drivers/nvc0/nv50_defs.xml.h b/src/gallium/drivers/nvc0/nv50_defs.xml.h +new file mode 100644 +index 0000000..1bf2f80 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nv50_defs.xml.h +@@ -0,0 +1,142 @@ ++#ifndef NV50_DEFS_XML ++#define NV50_DEFS_XML ++ ++/* Autogenerated file, DO NOT EDIT manually! ++ ++This file was generated by the rules-ng-ng headergen tool in this git repository: ++http://0x04.net/cgit/index.cgi/rules-ng-ng ++git clone git://0x04.net/rules-ng-ng ++ ++The rules-ng-ng source files this header was generated from are: ++- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) ++- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) ++ ++Copyright (C) 2006-2010 by the following authors: ++- Artur Huillet (ahuillet) ++- Ben Skeggs (darktama, darktama_) ++- B. R. (koala_br) ++- Carlos Martin (carlosmn) ++- Christoph Bumiller (calim, chrisbmr) ++- Dawid Gajownik (gajownik) ++- Dmitry Baryshkov ++- Dmitry Eremin-Solenikov (lumag) ++- EdB (edb_) ++- Erik Waling (erikwaling) ++- Francisco Jerez (curro, curro_, currojerez) ++- imirkin (imirkin) ++- jb17bsome (jb17bsome) ++- Jeremy Kolb (kjeremy) ++- Laurent Carlier (lordheavy) ++- Luca Barbieri (lb, lb1) ++- Maarten Maathuis (stillunknown) ++- Marcin Kościelnicki (mwk, koriakin) ++- Mark Carey (careym) ++- Matthieu Castet (mat-c) ++- nvidiaman (nvidiaman) ++- Patrice Mandin (pmandin, pmdata) ++- Pekka Paalanen (pq, ppaalanen) ++- Peter Popov (ironpeter) ++- Richard Hughes (hughsient) ++- Rudi Cilibrasi (cilibrar) ++- Serge Martin ++- Simon Raffeiner ++- Stephane Loeuillet (leroutier) ++- Stephane Marchesin (marcheu) ++- sturmflut (sturmflut) ++- Sylvain Munaut ++- Victor Stinner (haypo) ++- Wladmir van der Laan (miathan6) ++- Younes Manton (ymanton) ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++"Software"), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice (including the ++next paragraph) shall be included in all copies or substantial ++portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++*/ ++ ++ ++#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0 ++#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1 ++#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2 ++#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3 ++#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6 ++#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7 ++#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8 ++#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9 ++#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca ++#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb ++#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc ++#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd ++#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce ++#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf ++#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0 ++#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1 ++#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2 ++#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5 ++#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6 ++#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7 ++#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8 ++#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9 ++#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da ++#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db ++#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc ++#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd ++#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de ++#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df ++#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0 ++#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 ++#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6 ++#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7 ++#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8 ++#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9 ++#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea ++#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb ++#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec ++#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed ++#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee ++#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef ++#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 ++#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1 ++#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2 ++#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3 ++#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4 ++#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 ++#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 ++#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 ++#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8 ++#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9 ++#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa ++#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a ++#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 ++#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014 ++#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015 ++#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016 ++#define NV50_ZETA_FORMAT_UNK18 0x00000018 ++#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019 ++#define NV50_ZETA_FORMAT_UNK1D 0x0000001d ++#define NV50_ZETA_FORMAT_UNK1E 0x0000001e ++#define NV50_ZETA_FORMAT_UNK1F 0x0000001f ++#define NV50_QUERY__SIZE 0x00000010 ++#define NV50_QUERY_COUNTER 0x00000000 ++ ++#define NV50_QUERY_RES 0x00000004 ++ ++#define NV50_QUERY_TIME 0x00000008 ++ ++ ++#endif /* NV50_DEFS_XML */ +diff --git a/src/gallium/drivers/nvc0/nv50_texture.xml.h b/src/gallium/drivers/nvc0/nv50_texture.xml.h +new file mode 100644 +index 0000000..9f83206 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nv50_texture.xml.h +@@ -0,0 +1,259 @@ ++#ifndef NV50_TEXTURE_XML ++#define NV50_TEXTURE_XML ++ ++/* Autogenerated file, DO NOT EDIT manually! ++ ++This file was generated by the rules-ng-ng headergen tool in this git repository: ++http://0x04.net/cgit/index.cgi/rules-ng-ng ++git clone git://0x04.net/rules-ng-ng ++ ++The rules-ng-ng source files this header was generated from are: ++- nv50_texture.xml ( 6871 bytes, from 2010-10-03 13:18:37) ++- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) ++ ++Copyright (C) 2006-2010 by the following authors: ++- Artur Huillet (ahuillet) ++- Ben Skeggs (darktama, darktama_) ++- B. R. (koala_br) ++- Carlos Martin (carlosmn) ++- Christoph Bumiller (calim, chrisbmr) ++- Dawid Gajownik (gajownik) ++- Dmitry Baryshkov ++- Dmitry Eremin-Solenikov (lumag) ++- EdB (edb_) ++- Erik Waling (erikwaling) ++- Francisco Jerez (curro, curro_, currojerez) ++- imirkin (imirkin) ++- jb17bsome (jb17bsome) ++- Jeremy Kolb (kjeremy) ++- Laurent Carlier (lordheavy) ++- Luca Barbieri (lb, lb1) ++- Maarten Maathuis (stillunknown) ++- Marcin Kościelnicki (mwk, koriakin) ++- Mark Carey (careym) ++- Matthieu Castet (mat-c) ++- nvidiaman (nvidiaman) ++- Patrice Mandin (pmandin, pmdata) ++- Pekka Paalanen (pq, ppaalanen) ++- Peter Popov (ironpeter) ++- Richard Hughes (hughsient) ++- Rudi Cilibrasi (cilibrar) ++- Serge Martin ++- Simon Raffeiner ++- Stephane Loeuillet (leroutier) ++- Stephane Marchesin (marcheu) ++- sturmflut (sturmflut) ++- Sylvain Munaut ++- Victor Stinner (haypo) ++- Wladmir van der Laan (miathan6) ++- Younes Manton (ymanton) ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++"Software"), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice (including the ++next paragraph) shall be included in all copies or substantial ++portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++*/ ++ ++ ++#define NV50_TIC_MAP_ZERO 0x00000000 ++#define NV50_TIC_MAP_C0 0x00000002 ++#define NV50_TIC_MAP_C1 0x00000003 ++#define NV50_TIC_MAP_C2 0x00000004 ++#define NV50_TIC_MAP_C3 0x00000005 ++#define NV50_TIC_MAP_ONE 0x00000007 ++#define NV50_TIC_TYPE_SNORM 0x00000001 ++#define NV50_TIC_TYPE_UNORM 0x00000002 ++#define NV50_TIC_TYPE_SINT 0x00000003 ++#define NV50_TIC_TYPE_UINT 0x00000004 ++#define NV50_TIC_TYPE_SSCALED 0x00000005 ++#define NV50_TIC_TYPE_USCALED 0x00000006 ++#define NV50_TIC_TYPE_FLOAT 0x00000007 ++#define NV50_TSC_WRAP_REPEAT 0x00000000 ++#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001 ++#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 ++#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003 ++#define NV50_TSC_WRAP_CLAMP 0x00000004 ++#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005 ++#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006 ++#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007 ++#define NV50_TIC__SIZE 0x00000020 ++#define NV50_TIC_0 0x00000000 ++#define NV50_TIC_0_MAPA__MASK 0x38000000 ++#define NV50_TIC_0_MAPA__SHIFT 27 ++#define NV50_TIC_0_MAPB__MASK 0x07000000 ++#define NV50_TIC_0_MAPB__SHIFT 24 ++#define NV50_TIC_0_MAPG__MASK 0x00e00000 ++#define NV50_TIC_0_MAPG__SHIFT 21 ++#define NV50_TIC_0_MAPR__MASK 0x001c0000 ++#define NV50_TIC_0_MAPR__SHIFT 18 ++#define NV50_TIC_0_TYPE3__MASK 0x00038000 ++#define NV50_TIC_0_TYPE3__SHIFT 15 ++#define NV50_TIC_0_TYPE2__MASK 0x00007000 ++#define NV50_TIC_0_TYPE2__SHIFT 12 ++#define NV50_TIC_0_TYPE1__MASK 0x00000e00 ++#define NV50_TIC_0_TYPE1__SHIFT 9 ++#define NV50_TIC_0_TYPE0__MASK 0x000001c0 ++#define NV50_TIC_0_TYPE0__SHIFT 6 ++#define NV50_TIC_0_SWIZZLE__MASK 0x3ffc0000 ++#define NV50_TIC_0_FMT__MASK 0x0000003f ++#define NV50_TIC_0_FMT__SHIFT 0 ++#define NV50_TIC_0_FMT_32_32_32_32 0x00000001 ++#define NV50_TIC_0_FMT_16_16_16_16 0x00000003 ++#define NV50_TIC_0_FMT_32_32 0x00000004 ++#define NV50_TIC_0_FMT_32_8 0x00000005 ++#define NV50_TIC_0_FMT_8_8_8_8 0x00000008 ++#define NV50_TIC_0_FMT_2_10_10_10 0x00000009 ++#define NV50_TIC_0_FMT_16_16 0x0000000c ++#define NV50_TIC_0_FMT_8_24 0x0000000d ++#define NV50_TIC_0_FMT_24_8 0x0000000e ++#define NV50_TIC_0_FMT_32 0x0000000f ++#define NV50_TIC_0_FMT_4_4_4_4 0x00000012 ++#define NV50_TIC_0_FMT_5_5_5_1 0x00000013 ++#define NV50_TIC_0_FMT_1_5_5_5 0x00000014 ++#define NV50_TIC_0_FMT_5_6_5 0x00000015 ++#define NV50_TIC_0_FMT_6_5_5 0x00000016 ++#define NV50_TIC_0_FMT_8_8 0x00000018 ++#define NV50_TIC_0_FMT_16 0x0000001b ++#define NV50_TIC_0_FMT_8 0x0000001d ++#define NV50_TIC_0_FMT_4_4 0x0000001e ++#define NV50_TIC_0_FMT_UNK1F 0x0000001f ++#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020 ++#define NV50_TIC_0_FMT_10_11_11 0x00000021 ++#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022 ++#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023 ++#define NV50_TIC_0_FMT_DXT1 0x00000024 ++#define NV50_TIC_0_FMT_DXT3 0x00000025 ++#define NV50_TIC_0_FMT_DXT5 0x00000026 ++#define NV50_TIC_0_FMT_RGTC1 0x00000027 ++#define NV50_TIC_0_FMT_RGTC2 0x00000028 ++#define NV50_TIC_0_FMT_24_8_ZETA 0x00000029 ++#define NV50_TIC_0_FMT_8_24_ZETA 0x0000002a ++#define NV50_TIC_0_FMT_UNK2C_ZETA 0x0000002c ++#define NV50_TIC_0_FMT_UNK2D_ZETA 0x0000002d ++#define NV50_TIC_0_FMT_UNK2E_ZETA 0x0000002e ++#define NV50_TIC_0_FMT_32_ZETA 0x0000002f ++#define NV50_TIC_0_FMT_32_8_ZETA 0x00000030 ++#define NV50_TIC_0_FMT_16_ZETA 0x0000003a ++ ++#define NV50_TIC_1 0x00000004 ++#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff ++#define NV50_TIC_1_OFFSET_LOW__SHIFT 0 ++ ++#define NV50_TIC_2 0x00000008 ++#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff ++#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0 ++#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400 ++#define NV50_TIC_2_TARGET__MASK 0x0003c000 ++#define NV50_TIC_2_TARGET__SHIFT 14 ++#define NV50_TIC_2_TARGET_1D 0x00000000 ++#define NV50_TIC_2_TARGET_2D 0x00004000 ++#define NV50_TIC_2_TARGET_3D 0x00008000 ++#define NV50_TIC_2_TARGET_CUBE 0x0000c000 ++#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000 ++#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000 ++#define NV50_TIC_2_TARGET_BUFFER 0x00018000 ++#define NV50_TIC_2_TARGET_RECT 0x0001c000 ++#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000 ++#define NV50_TIC_2_TILE_MODE_LINEAR 0x00040000 ++#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000 ++#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22 ++#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000 ++#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25 ++#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000 ++#define NV50_TIC_2_2D_UNK0258__SHIFT 28 ++#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000 ++ ++#define NV50_TIC_3 0x0000000c ++#define NV50_TIC_3_PITCH__MASK 0xffffffff ++#define NV50_TIC_3_PITCH__SHIFT 0 ++ ++#define NV50_TIC_4 0x00000010 ++#define NV50_TIC_4_WIDTH__MASK 0xffffffff ++#define NV50_TIC_4_WIDTH__SHIFT 0 ++ ++#define NV50_TIC_5 0x00000014 ++#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000 ++#define NV50_TIC_5_LAST_LEVEL__SHIFT 28 ++#define NV50_TIC_5_DEPTH__MASK 0x0fff0000 ++#define NV50_TIC_5_DEPTH__SHIFT 16 ++#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff ++#define NV50_TIC_5_HEIGHT__SHIFT 0 ++ ++#define NV50_TIC_7 0x0000001c ++#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f ++#define NV50_TIC_7_BASE_LEVEL__SHIFT 0 ++#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 ++#define NV50_TIC_7_MAX_LEVEL__SHIFT 4 ++ ++#define NV50_TSC__SIZE 0x00000020 ++#define NV50_TSC_0 0x00000000 ++#define NV50_TSC_0_WRAPS__MASK 0x00000007 ++#define NV50_TSC_0_WRAPS__SHIFT 0 ++#define NV50_TSC_0_WRAPT__MASK 0x00000038 ++#define NV50_TSC_0_WRAPT__SHIFT 3 ++#define NV50_TSC_0_WRAPR__MASK 0x000001c0 ++#define NV50_TSC_0_WRAPR__SHIFT 6 ++#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200 ++#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00 ++#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10 ++#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000 ++#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20 ++ ++#define NV50_TSC_1 0x00000004 ++#define NV50_TSC_1_UNKN_ANISO_15 0x10000000 ++#define NV50_TSC_1_UNKN_ANISO_35 0x18000000 ++#define NV50_TSC_1_MAGF__MASK 0x00000003 ++#define NV50_TSC_1_MAGF__SHIFT 0 ++#define NV50_TSC_1_MAGF_NEAREST 0x00000001 ++#define NV50_TSC_1_MAGF_LINEAR 0x00000002 ++#define NV50_TSC_1_MINF__MASK 0x00000030 ++#define NV50_TSC_1_MINF__SHIFT 4 ++#define NV50_TSC_1_MINF_NEAREST 0x00000010 ++#define NV50_TSC_1_MINF_LINEAR 0x00000020 ++#define NV50_TSC_1_MIPF__MASK 0x000000c0 ++#define NV50_TSC_1_MIPF__SHIFT 6 ++#define NV50_TSC_1_MIPF_NONE 0x00000040 ++#define NV50_TSC_1_MIPF_NEAREST 0x00000080 ++#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 ++#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 ++#define NV50_TSC_1_LOD_BIAS__SHIFT 12 ++ ++#define NV50_TSC_2 0x00000008 ++#define NV50_TSC_2_MIN_LOD__MASK 0x00000f00 ++#define NV50_TSC_2_MIN_LOD__SHIFT 8 ++#define NV50_TSC_2_MAX_LOD__MASK 0x00f00000 ++#define NV50_TSC_2_MAX_LOD__SHIFT 20 ++ ++#define NV50_TSC_4 0x00000010 ++#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff ++#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0 ++ ++#define NV50_TSC_5 0x00000014 ++#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff ++#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0 ++ ++#define NV50_TSC_6 0x00000018 ++#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff ++#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0 ++ ++#define NV50_TSC_7 0x0000001c ++#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff ++#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 ++ ++ ++#endif /* NV50_TEXTURE_XML */ +diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h +new file mode 100644 +index 0000000..aebcd51 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h +@@ -0,0 +1,380 @@ ++#ifndef NVC0_2D_XML ++#define NVC0_2D_XML ++ ++/* Autogenerated file, DO NOT EDIT manually! ++ ++This file was generated by the rules-ng-ng headergen tool in this git repository: ++http://0x04.net/cgit/index.cgi/rules-ng-ng ++git clone git://0x04.net/rules-ng-ng ++ ++The rules-ng-ng source files this header was generated from are: ++- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11) ++- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) ++- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) ++- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) ++- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) ++- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) ++ ++Copyright (C) 2006-2010 by the following authors: ++- Artur Huillet (ahuillet) ++- Ben Skeggs (darktama, darktama_) ++- B. R. (koala_br) ++- Carlos Martin (carlosmn) ++- Christoph Bumiller (calim, chrisbmr) ++- Dawid Gajownik (gajownik) ++- Dmitry Baryshkov ++- Dmitry Eremin-Solenikov (lumag) ++- EdB (edb_) ++- Erik Waling (erikwaling) ++- Francisco Jerez (curro, curro_, currojerez) ++- imirkin (imirkin) ++- jb17bsome (jb17bsome) ++- Jeremy Kolb (kjeremy) ++- Laurent Carlier (lordheavy) ++- Luca Barbieri (lb, lb1) ++- Maarten Maathuis (stillunknown) ++- Marcin Kościelnicki (mwk, koriakin) ++- Mark Carey (careym) ++- Matthieu Castet (mat-c) ++- nvidiaman (nvidiaman) ++- Patrice Mandin (pmandin, pmdata) ++- Pekka Paalanen (pq, ppaalanen) ++- Peter Popov (ironpeter) ++- Richard Hughes (hughsient) ++- Rudi Cilibrasi (cilibrar) ++- Serge Martin ++- Simon Raffeiner ++- Stephane Loeuillet (leroutier) ++- Stephane Marchesin (marcheu) ++- sturmflut (sturmflut) ++- Sylvain Munaut ++- Victor Stinner (haypo) ++- Wladmir van der Laan (miathan6) ++- Younes Manton (ymanton) ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++"Software"), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice (including the ++next paragraph) shall be included in all copies or substantial ++portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++*/ ++ ++ ++ ++#define NVC0_2D_DST_FORMAT 0x00000200 ++ ++#define NVC0_2D_DST_LINEAR 0x00000204 ++ ++#define NVC0_2D_DST_TILE_MODE 0x00000208 ++ ++#define NVC0_2D_DST_DEPTH 0x0000020c ++ ++#define NVC0_2D_DST_LAYER 0x00000210 ++ ++#define NVC0_2D_DST_PITCH 0x00000214 ++ ++#define NVC0_2D_DST_WIDTH 0x00000218 ++ ++#define NVC0_2D_DST_HEIGHT 0x0000021c ++ ++#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220 ++ ++#define NVC0_2D_DST_ADDRESS_LOW 0x00000224 ++ ++#define NVC0_2D_UNK228 0x00000228 ++ ++#define NVC0_2D_SRC_FORMAT 0x00000230 ++ ++#define NVC0_2D_SRC_LINEAR 0x00000234 ++ ++#define NVC0_2D_SRC_TILE_MODE 0x00000238 ++ ++#define NVC0_2D_SRC_DEPTH 0x0000023c ++ ++#define NVC0_2D_SRC_LAYER 0x00000240 ++ ++#define NVC0_2D_SRC_PITCH 0x00000244 ++#define NVC0_2D_SRC_PITCH__MAX 0x00040000 ++ ++#define NVC0_2D_SRC_WIDTH 0x00000248 ++#define NVC0_2D_SRC_WIDTH__MAX 0x00010000 ++ ++#define NVC0_2D_SRC_HEIGHT 0x0000024c ++#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000 ++ ++#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250 ++ ++#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254 ++ ++#define NVC0_2D_UNK258 0x00000258 ++ ++#define NVC0_2D_UNK260 0x00000260 ++ ++#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264 ++ ++#define NVC0_2D_COND_ADDRESS_LOW 0x00000268 ++ ++#define NVC0_2D_COND_MODE 0x0000026c ++#define NVC0_2D_COND_MODE_NEVER 0x00000000 ++#define NVC0_2D_COND_MODE_ALWAYS 0x00000001 ++#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002 ++#define NVC0_2D_COND_MODE_EQUAL 0x00000003 ++#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004 ++ ++#define NVC0_2D_CLIP_X 0x00000280 ++ ++#define NVC0_2D_CLIP_Y 0x00000284 ++ ++#define NVC0_2D_CLIP_W 0x00000288 ++ ++#define NVC0_2D_CLIP_H 0x0000028c ++ ++#define NVC0_2D_CLIP_ENABLE 0x00000290 ++ ++#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294 ++#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 ++#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 ++#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 ++#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 ++#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 ++#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 ++#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 ++ ++#define NVC0_2D_COLOR_KEY 0x00000298 ++ ++#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c ++ ++#define NVC0_2D_ROP 0x000002a0 ++ ++#define NVC0_2D_BETA1 0x000002a4 ++ ++#define NVC0_2D_BETA4 0x000002a8 ++ ++#define NVC0_2D_OPERATION 0x000002ac ++#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000 ++#define NVC0_2D_OPERATION_ROP_AND 0x00000001 ++#define NVC0_2D_OPERATION_BLEND_AND 0x00000002 ++#define NVC0_2D_OPERATION_SRCCOPY 0x00000003 ++#define NVC0_2D_OPERATION_UNK4 0x00000004 ++#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 ++#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006 ++ ++#define NVC0_2D_UNK2B0 0x000002b0 ++#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f ++#define NVC0_2D_UNK2B0_UNK0__SHIFT 0 ++#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00 ++#define NVC0_2D_UNK2B0_UNK1__SHIFT 8 ++ ++#define NVC0_2D_PATTERN_SELECT 0x000002b4 ++#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000 ++#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001 ++#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002 ++#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003 ++ ++#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8 ++#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 ++#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 ++#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 ++#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 ++#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 ++#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 ++ ++#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec ++#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 ++#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001 ++ ++#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) ++#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004 ++#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002 ++ ++#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) ++#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004 ++#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002 ++ ++#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) ++#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 ++#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040 ++#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff ++#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0 ++#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 ++#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8 ++#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 ++#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16 ++ ++#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) ++#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004 ++#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020 ++#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f ++#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0 ++#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 ++#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5 ++#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 ++#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11 ++#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 ++#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16 ++#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 ++#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21 ++#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 ++#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27 ++ ++#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) ++#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 ++#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020 ++#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f ++#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 ++#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 ++#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 ++#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 ++#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 ++#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 ++#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 ++#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 ++#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 ++#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 ++#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 ++ ++#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) ++#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004 ++#define NVC0_2D_PATTERN_Y8__LEN 0x00000010 ++#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff ++#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0 ++#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 ++#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8 ++#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 ++#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16 ++#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000 ++#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24 ++ ++#define NVC0_2D_DRAW_SHAPE 0x00000580 ++#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000 ++#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001 ++#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 ++#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003 ++#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004 ++ ++#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584 ++ ++#define NVC0_2D_DRAW_COLOR 0x00000588 ++ ++#define NVC0_2D_UNK58C 0x0000058c ++#define NVC0_2D_UNK58C_0 0x00000001 ++#define NVC0_2D_UNK58C_1 0x00000010 ++#define NVC0_2D_UNK58C_2 0x00000100 ++#define NVC0_2D_UNK58C_3 0x00001000 ++ ++#define NVC0_2D_DRAW_POINT16 0x000005e0 ++#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff ++#define NVC0_2D_DRAW_POINT16_X__SHIFT 0 ++#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000 ++#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16 ++ ++#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) ++#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008 ++#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040 ++ ++#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) ++#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008 ++#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040 ++ ++#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800 ++ ++#define NVC0_2D_SIFC_FORMAT 0x00000804 ++ ++#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808 ++#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 ++#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 ++#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 ++ ++#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c ++ ++#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 ++#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 ++#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 ++#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 ++ ++#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 ++ ++#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 ++ ++#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c ++ ++#define NVC0_2D_SIFC_WIDTH 0x00000838 ++ ++#define NVC0_2D_SIFC_HEIGHT 0x0000083c ++ ++#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840 ++ ++#define NVC0_2D_SIFC_DX_DU_INT 0x00000844 ++ ++#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848 ++ ++#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c ++ ++#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850 ++ ++#define NVC0_2D_SIFC_DST_X_INT 0x00000854 ++ ++#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858 ++ ++#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c ++ ++#define NVC0_2D_SIFC_DATA 0x00000860 ++ ++#define NVC0_2D_UNK0870 0x00000870 ++ ++#define NVC0_2D_UNK0880 0x00000880 ++ ++#define NVC0_2D_UNK0884 0x00000884 ++ ++#define NVC0_2D_UNK0888 0x00000888 ++ ++#define NVC0_2D_BLIT_CONTROL 0x0000088c ++#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 ++#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 ++#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 ++#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 ++#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 ++#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4 ++#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 ++#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 ++ ++#define NVC0_2D_BLIT_DST_X 0x000008b0 ++ ++#define NVC0_2D_BLIT_DST_Y 0x000008b4 ++ ++#define NVC0_2D_BLIT_DST_W 0x000008b8 ++ ++#define NVC0_2D_BLIT_DST_H 0x000008bc ++ ++#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0 ++ ++#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4 ++ ++#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8 ++ ++#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc ++ ++#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0 ++ ++#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4 ++ ++#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8 ++ ++#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc ++ ++ ++#endif /* NVC0_2D_XML */ +diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h +new file mode 100644 +index 0000000..61932ff +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h +@@ -0,0 +1,1183 @@ ++#ifndef NVC0_3D_XML ++#define NVC0_3D_XML ++ ++/* Autogenerated file, DO NOT EDIT manually! ++ ++This file was generated by the rules-ng-ng headergen tool in this git repository: ++http://0x04.net/cgit/index.cgi/rules-ng-ng ++git clone git://0x04.net/rules-ng-ng ++ ++The rules-ng-ng source files this header was generated from are: ++- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07) ++- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) ++- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) ++- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40) ++- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20) ++- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28) ++- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17) ++ ++Copyright (C) 2006-2011 by the following authors: ++- Artur Huillet (ahuillet) ++- Ben Skeggs (darktama, darktama_) ++- B. R. (koala_br) ++- Carlos Martin (carlosmn) ++- Christoph Bumiller (calim, chrisbmr) ++- Dawid Gajownik (gajownik) ++- Dmitry Baryshkov ++- Dmitry Eremin-Solenikov (lumag) ++- EdB (edb_) ++- Erik Waling (erikwaling) ++- Francisco Jerez (curro) ++- imirkin (imirkin) ++- jb17bsome (jb17bsome) ++- Jeremy Kolb (kjeremy) ++- Laurent Carlier (lordheavy) ++- Luca Barbieri (lb, lb1) ++- Maarten Maathuis (stillunknown) ++- Marcin Kościelnicki (mwk, koriakin) ++- Mark Carey (careym) ++- Matthieu Castet (mat-c) ++- nvidiaman (nvidiaman) ++- Patrice Mandin (pmandin, pmdata) ++- Pekka Paalanen (pq, ppaalanen) ++- Peter Popov (ironpeter) ++- Richard Hughes (hughsient) ++- Rudi Cilibrasi (cilibrar) ++- Serge Martin ++- Simon Raffeiner ++- Stephane Loeuillet (leroutier) ++- Stephane Marchesin (marcheu) ++- sturmflut (sturmflut) ++- Sylvain Munaut ++- Victor Stinner (haypo) ++- Wladmir van der Laan (miathan6) ++- Younes Manton (ymanton) ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++"Software"), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice (including the ++next paragraph) shall be included in all copies or substantial ++portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++*/ ++ ++ ++ ++#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104 ++#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108 ++#define NVC0_3D_NOTIFY 0x0000010c ++ ++#define NVC0_3D_SERIALIZE 0x00000110 ++ ++#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 ++ ++#define NVC0_3D_TESS_MODE 0x00000320 ++#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f ++#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 ++#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000 ++#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001 ++#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002 ++#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0 ++#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4 ++#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000 ++#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010 ++#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020 ++#define NVC0_3D_TESS_MODE_CW 0x00000100 ++#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200 ++ ++#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0)) ++#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004 ++#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004 ++ ++#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0)) ++#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004 ++#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002 ++ ++#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c ++ ++#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0)) ++#define NVC0_3D_TFB__ESIZE 0x00000020 ++#define NVC0_3D_TFB__LEN 0x00000004 ++ ++#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0)) ++ ++#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0)) ++ ++#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0)) ++ ++#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0)) ++ ++#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) ++ ++#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0)) ++ ++#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) ++ ++#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) ++ ++#define NVC0_3D_TFB_ENABLE 0x00000744 ++ ++#define NVC0_3D_LOCAL_BASE 0x0000077c ++ ++#define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790 ++ ++#define NVC0_3D_LOCAL_ADDRESS_LOW 0x00000794 ++ ++#define NVC0_3D_LOCAL_SIZE_HIGH 0x00000798 ++ ++#define NVC0_3D_LOCAL_SIZE_LOW 0x0000079c ++ ++#define NVC0_3D_RT(i0) (0x00000800 + 0x20*(i0)) ++#define NVC0_3D_RT__ESIZE 0x00000020 ++#define NVC0_3D_RT__LEN 0x00000008 ++ ++#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0)) ++ ++#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0)) ++ ++#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0)) ++ ++#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0)) ++ ++#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0)) ++ ++#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0)) ++#define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001 ++#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 ++#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 ++#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 ++#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 ++ ++#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0)) ++#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff ++#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 ++#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 ++ ++#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0)) ++ ++#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) ++#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 ++#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010 ++ ++#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) ++#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 ++#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 ++ ++#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) ++#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 ++#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 ++ ++#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) ++#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 ++#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 ++ ++#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) ++#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 ++#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 ++ ++#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) ++#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 ++#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 ++ ++#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) ++#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 ++#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010 ++#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff ++#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0 ++#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 ++#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16 ++ ++#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) ++#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010 ++#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010 ++#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff ++#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0 ++#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000 ++#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16 ++ ++#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) ++#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 ++#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 ++ ++#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) ++#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 ++#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 ++ ++#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0)) ++#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 ++#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 ++#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff ++#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0 ++#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000 ++#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16 ++ ++#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0)) ++#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 ++#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 ++#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff ++#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0 ++#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000 ++#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16 ++ ++#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) ++#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 ++#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 ++#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff ++#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 ++#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 ++#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 ++ ++#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) ++#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 ++#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004 ++#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff ++#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0 ++#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 ++#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 ++ ++#define NVC0_3D_COUNTER_ENABLE 0x00000d68 ++#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 ++#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 ++#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004 ++#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008 ++#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010 ++#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020 ++#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040 ++#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080 ++#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100 ++#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200 ++#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400 ++#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800 ++#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000 ++#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000 ++#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000 ++#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000 ++ ++#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74 ++ ++#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78 ++ ++#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) ++#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004 ++#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004 ++ ++#define NVC0_3D_CLEAR_DEPTH 0x00000d90 ++ ++#define NVC0_3D_CLEAR_STENCIL 0x00000da0 ++ ++#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 ++ ++#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 ++ ++#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 ++ ++#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 ++ ++#define NVC0_3D_PATCH_VERTICES 0x00000dcc ++ ++#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8 ++ ++#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc ++ ++#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) ++#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010 ++#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010 ++ ++#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) ++#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010 ++#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010 ++#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff ++#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0 ++#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 ++#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16 ++ ++#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) ++#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010 ++#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010 ++#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff ++#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0 ++#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 ++#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16 ++ ++#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54 ++ ++#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58 ++ ++#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c ++ ++#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 ++ ++#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 ++ ++#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) ++#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004 ++#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002 ++ ++#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) ++#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004 ++#define NVC0_3D_MSAA_MASK__LEN 0x00000004 ++ ++#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc ++ ++#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0 ++ ++#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0 ++ ++#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4 ++ ++#define NVC0_3D_ZETA_FORMAT 0x00000fe8 ++ ++#define NVC0_3D_ZETA_TILE_MODE 0x00000fec ++ ++#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0 ++ ++#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 ++#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 ++#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 ++#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff ++#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 ++ ++#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8 ++#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 ++#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 ++#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff ++#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 ++ ++#define NVC0_3D_VERTEX_ID 0x00001118 ++ ++#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c ++#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff ++#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 ++#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 ++#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 ++#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 ++#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 ++#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 ++#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 ++#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 ++#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 ++#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 ++#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 ++ ++#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) ++#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 ++#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004 ++ ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0)) ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000 ++#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000 ++ ++#define NVC0_3D_RT_CONTROL 0x0000121c ++#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f ++#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0 ++#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070 ++#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4 ++#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380 ++#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7 ++#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00 ++#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10 ++#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000 ++#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13 ++#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000 ++#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16 ++#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000 ++#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19 ++#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000 ++#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22 ++#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000 ++#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25 ++ ++#define NVC0_3D_ZETA_HORIZ 0x00001228 ++ ++#define NVC0_3D_ZETA_VERT 0x0000122c ++ ++#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230 ++#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff ++#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 ++#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000 ++ ++#define NVC0_3D_LINKED_TSC 0x00001234 ++ ++#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c ++ ++#define NVC0_3D_FP_RESULT_COUNT 0x00001298 ++ ++#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc ++ ++#define NVC0_3D_D3D_FILL_MODE 0x000012d0 ++#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001 ++#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 ++#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003 ++ ++#define NVC0_3D_SHADE_MODEL 0x000012d4 ++#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00 ++#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01 ++ ++#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4 ++ ++#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8 ++ ++#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec ++ ++#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300 ++#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 ++#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 ++#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff ++#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 ++ ++#define NVC0_3D_VB_ELEMENT_U8 0x00001304 ++#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff ++#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0 ++#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 ++#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8 ++#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 ++#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16 ++#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 ++#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24 ++ ++#define NVC0_3D_D3D_CULL_MODE 0x00001308 ++#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001 ++#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002 ++#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003 ++ ++#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c ++#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 ++#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201 ++#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 ++#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 ++#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 ++#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 ++#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 ++#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 ++ ++#define NVC0_3D_ALPHA_TEST_REF 0x00001310 ++ ++#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314 ++#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 ++#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201 ++#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 ++#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 ++#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 ++#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 ++#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 ++#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 ++ ++#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318 ++#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 ++#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff ++ ++#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) ++#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004 ++#define NVC0_3D_BLEND_COLOR__LEN 0x00000004 ++ ++#define NVC0_3D_TSC_FLUSH 0x00001330 ++#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001 ++#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 ++#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4 ++ ++#define NVC0_3D_TIC_FLUSH 0x00001334 ++#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001 ++#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 ++#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4 ++ ++#define NVC0_3D_TEX_CACHE_CTL 0x00001338 ++#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 ++#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 ++ ++#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340 ++#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 ++#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007 ++#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008 ++#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a ++#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b ++ ++#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344 ++ ++#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348 ++ ++#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c ++#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 ++#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 ++#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 ++#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a ++#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b ++ ++#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 ++ ++#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358 ++ ++#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) ++#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 ++#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 ++ ++#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380 ++ ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 ++#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 ++ ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 ++#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 ++ ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 ++#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 ++ ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 ++#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 ++ ++#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 ++ ++#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398 ++ ++#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c ++ ++#define NVC0_3D_DRAW_TFB_BASE 0x000013a4 ++ ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 ++#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 ++ ++#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac ++#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 ++#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 ++ ++#define NVC0_3D_LINE_WIDTH 0x000013b0 ++ ++#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 ++#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 ++#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 ++ ++#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c ++ ++#define NVC0_3D_VB_ELEMENT_BASE 0x00001434 ++ ++#define NVC0_3D_VB_INSTANCE_BASE 0x00001438 ++ ++#define NVC0_3D_CODE_CB_FLUSH 0x00001440 ++ ++#define NVC0_3D_CLIPID_HEIGHT 0x00001504 ++#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 ++ ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040 ++#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080 ++ ++#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514 ++ ++#define NVC0_3D_POINT_SIZE 0x00001518 ++ ++#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 ++ ++#define NVC0_3D_COUNTER_RESET 0x00001530 ++#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001 ++#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002 ++#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003 ++#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004 ++#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010 ++#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011 ++#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012 ++#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013 ++#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015 ++#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016 ++#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017 ++#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018 ++#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a ++#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b ++#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c ++#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d ++#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e ++#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f ++ ++#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534 ++ ++#define NVC0_3D_ZETA_ENABLE 0x00001538 ++ ++#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c ++#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 ++#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 ++ ++#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550 ++ ++#define NVC0_3D_COND_ADDRESS_LOW 0x00001554 ++ ++#define NVC0_3D_COND_MODE 0x00001558 ++#define NVC0_3D_COND_MODE_NEVER 0x00000000 ++#define NVC0_3D_COND_MODE_ALWAYS 0x00000001 ++#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002 ++#define NVC0_3D_COND_MODE_EQUAL 0x00000003 ++#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004 ++ ++#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c ++ ++#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560 ++#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 ++ ++#define NVC0_3D_TSC_LIMIT 0x00001564 ++#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff ++ ++#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c ++ ++#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570 ++ ++#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574 ++ ++#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578 ++ ++#define NVC0_3D_TIC_LIMIT 0x0000157c ++ ++#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 ++ ++#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598 ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 ++#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 ++ ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 ++#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 ++ ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 ++#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 ++ ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 ++#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 ++ ++#define NVC0_3D_CSAA_ENABLE 0x000015b4 ++ ++#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8 ++ ++#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc ++ ++#define NVC0_3D_GP_BUILTIN_RESULT_EN 0x000015cc ++#define NVC0_3D_GP_BUILTIN_RESULT_EN_LAYER 0x00010000 ++ ++#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0 ++#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000 ++#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001 ++#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002 ++#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003 ++#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008 ++#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009 ++#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a ++ ++#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4 ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c ++#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d ++#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 ++ ++#define NVC0_3D_VERTEX_END_D3D 0x000015d8 ++#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001 ++#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002 ++ ++#define NVC0_3D_EDGEFLAG_ENABLE 0x000015e4 ++ ++#define NVC0_3D_VB_ELEMENT_U32 0x000015e8 ++ ++#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec ++#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 ++#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 ++#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff ++#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 ++ ++#define NVC0_3D_VB_ELEMENT_U16 0x000015f0 ++#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff ++#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0 ++#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 ++#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16 ++ ++#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4 ++ ++#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 ++ ++#define NVC0_3D_POINT_COORD_REPLACE 0x00001604 ++#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff ++#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0 ++ ++#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 ++ ++#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c ++ ++#define NVC0_3D_VERTEX_END_GL 0x00001614 ++#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001 ++#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002 ++ ++#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d ++#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e ++#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000 ++#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000 ++ ++#define NVC0_3D_VERTEX_DATA 0x00001640 ++ ++#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644 ++ ++#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648 ++ ++#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c ++#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 ++#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 ++#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 ++#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 ++ ++#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658 ++ ++#define NVC0_3D_POINT_RASTER_RULES 0x0000165c ++#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 ++#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 ++ ++#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660 ++ ++#define NVC0_3D_TEX_MISC 0x00001664 ++#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 ++ ++#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c ++ ++#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680 ++ ++#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684 ++ ++#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 ++ ++#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c ++ ++#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) ++#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 ++#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 ++ ++#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0)) ++#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004 ++#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004 ++ ++#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc ++ ++#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0 ++ ++#define NVC0_3D_UNK17BC_LIMIT 0x000017c4 ++ ++#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8 ++ ++#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc ++ ++#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0 ++ ++#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4 ++ ++#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8 ++ ++#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc ++ ++#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0 ++ ++#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0)) ++#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 ++#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020 ++ ++#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910 ++ ++#define NVC0_3D_CULL_FACE_ENABLE 0x00001918 ++ ++#define NVC0_3D_FRONT_FACE 0x0000191c ++#define NVC0_3D_FRONT_FACE_CW 0x00000900 ++#define NVC0_3D_FRONT_FACE_CCW 0x00000901 ++ ++#define NVC0_3D_CULL_FACE 0x00001920 ++#define NVC0_3D_CULL_FACE_FRONT 0x00000404 ++#define NVC0_3D_CULL_FACE_BACK 0x00000405 ++#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 ++ ++#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c ++ ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 ++ ++#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c ++ ++#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950 ++#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000 ++#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001 ++#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002 ++ ++#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c ++#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 ++#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010 ++ ++#define NVC0_3D_CLIPID_ENABLE 0x0000197c ++ ++#define NVC0_3D_CLIPID_WIDTH 0x00001980 ++#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000 ++#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040 ++ ++#define NVC0_3D_CLIPID_ID 0x00001984 ++ ++#define NVC0_3D_FP_CONTROL 0x000019a8 ++#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001 ++#define NVC0_3D_FP_CONTROL_EXPORTS_Z 0x00000100 ++#define NVC0_3D_FP_CONTROL_USES_KIL 0x00100000 ++ ++#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc ++ ++#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4 ++ ++#define NVC0_3D_LOGIC_OP 0x000019c8 ++#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500 ++#define NVC0_3D_LOGIC_OP_AND 0x00001501 ++#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502 ++#define NVC0_3D_LOGIC_OP_COPY 0x00001503 ++#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504 ++#define NVC0_3D_LOGIC_OP_NOOP 0x00001505 ++#define NVC0_3D_LOGIC_OP_XOR 0x00001506 ++#define NVC0_3D_LOGIC_OP_OR 0x00001507 ++#define NVC0_3D_LOGIC_OP_NOR 0x00001508 ++#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509 ++#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a ++#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b ++#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c ++#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d ++#define NVC0_3D_LOGIC_OP_NAND 0x0000150e ++#define NVC0_3D_LOGIC_OP_SET 0x0000150f ++ ++#define NVC0_3D_CLEAR_BUFFERS 0x000019d0 ++#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001 ++#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002 ++#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004 ++#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008 ++#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010 ++#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020 ++#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 ++#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6 ++#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 ++#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 ++ ++#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) ++#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 ++#define NVC0_3D_COLOR_MASK__LEN 0x00000008 ++#define NVC0_3D_COLOR_MASK_R 0x0000000f ++#define NVC0_3D_COLOR_MASK_G 0x000000f0 ++#define NVC0_3D_COLOR_MASK_B 0x00000f00 ++#define NVC0_3D_COLOR_MASK_A 0x0000f000 ++ ++#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00 ++ ++#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04 ++ ++#define NVC0_3D_QUERY_SEQUENCE 0x00001b08 ++ ++#define NVC0_3D_QUERY_GET 0x00001b0c ++#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003 ++#define NVC0_3D_QUERY_GET_MODE__SHIFT 0 ++#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 ++#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001 ++#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 ++#define NVC0_3D_QUERY_GET_FENCE 0x00000010 ++#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0 ++#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5 ++#define NVC0_3D_QUERY_GET_UNK8 0x00000100 ++#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000 ++#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12 ++#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 ++#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16 ++#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 ++#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 ++#define NVC0_3D_QUERY_GET_INTR 0x00100000 ++#define NVC0_3D_QUERY_GET_UNK21 0x00200000 ++#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000 ++#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23 ++#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000 ++#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000 ++#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000 ++#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000 ++#define NVC0_3D_QUERY_GET_SHORT 0x10000000 ++ ++#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0)) ++#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 ++#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020 ++#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff ++#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 ++#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000 ++ ++#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0)) ++#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 ++#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020 ++ ++#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0)) ++#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 ++#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020 ++ ++#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0)) ++#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 ++#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020 ++ ++#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) ++#define NVC0_3D_IBLEND__ESIZE 0x00000020 ++#define NVC0_3D_IBLEND__LEN 0x00000008 ++ ++#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) ++#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 ++#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 ++#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 ++#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a ++#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b ++ ++#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) ++ ++#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) ++ ++#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) ++#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 ++#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 ++#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 ++#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a ++#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b ++ ++#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) ++ ++#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) ++ ++#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0)) ++#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 ++#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020 ++ ++#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0)) ++#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 ++#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020 ++ ++#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0)) ++#define NVC0_3D_SP__ESIZE 0x00000040 ++#define NVC0_3D_SP__LEN 0x00000006 ++ ++#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0)) ++#define NVC0_3D_SP_SELECT_ENABLE 0x00000001 ++#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070 ++#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4 ++#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000 ++#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010 ++#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020 ++#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030 ++#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040 ++#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050 ++ ++#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0)) ++ ++#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0)) ++ ++#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) ++#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 ++#define NVC0_3D_TEX_LIMITS__LEN 0x00000005 ++ ++#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0)) ++#define NVC0_3D_FIRMWARE__ESIZE 0x00000004 ++#define NVC0_3D_FIRMWARE__LEN 0x00000020 ++ ++#define NVC0_3D_CB_SIZE 0x00002380 ++ ++#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384 ++ ++#define NVC0_3D_CB_ADDRESS_LOW 0x00002388 ++ ++#define NVC0_3D_CB_POS 0x0000238c ++ ++#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0)) ++#define NVC0_3D_CB_DATA__ESIZE 0x00000004 ++#define NVC0_3D_CB_DATA__LEN 0x00000010 ++ ++#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0)) ++#define NVC0_3D_BIND_TSC__ESIZE 0x00000020 ++#define NVC0_3D_BIND_TSC__LEN 0x00000005 ++#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001 ++#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0 ++#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4 ++#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000 ++#define NVC0_3D_BIND_TSC_TSC__SHIFT 12 ++ ++#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0)) ++#define NVC0_3D_BIND_TIC__ESIZE 0x00000020 ++#define NVC0_3D_BIND_TIC__LEN 0x00000005 ++#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001 ++#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe ++#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1 ++#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00 ++#define NVC0_3D_BIND_TIC_TIC__SHIFT 9 ++ ++#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0)) ++#define NVC0_3D_CB_BIND__ESIZE 0x00000020 ++#define NVC0_3D_CB_BIND__LEN 0x00000005 ++#define NVC0_3D_CB_BIND_VALID 0x00000001 ++#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0 ++#define NVC0_3D_CB_BIND_INDEX__SHIFT 4 ++ ++#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 ++ ++#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0)) ++#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 ++#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080 ++ ++#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 ++ ++#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820 ++ ++#define NVC0_3D_BLEND_ENABLES 0x00003858 ++ ++#define NVC0_3D_POLYGON_MODE_FRONT 0x00003868 ++#define NVC0_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 ++#define NVC0_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 ++#define NVC0_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 ++ ++#define NVC0_3D_POLYGON_MODE_BACK 0x00003870 ++#define NVC0_3D_POLYGON_MODE_BACK_POINT 0x00001b00 ++#define NVC0_3D_POLYGON_MODE_BACK_LINE 0x00001b01 ++#define NVC0_3D_POLYGON_MODE_BACK_FILL 0x00001b02 ++ ++#define NVC0_3D_GP_SELECT 0x00003878 ++ ++#define NVC0_3D_TEP_SELECT 0x00003880 ++ ++ ++#endif /* NVC0_3D_XML */ +diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h +new file mode 100644 +index 0000000..84b1522 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h +@@ -0,0 +1,98 @@ ++#ifndef NV_3DDEFS_XML ++#define NV_3DDEFS_XML ++ ++/* Autogenerated file, DO NOT EDIT manually! ++ ++This file was generated by the rules-ng-ng headergen tool in this git repository: ++http://0x04.net/cgit/index.cgi/rules-ng-ng ++git clone git://0x04.net/rules-ng-ng ++ ++The rules-ng-ng source files this header was generated from are: ++- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) ++- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) ++- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) ++- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) ++- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) ++- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) ++- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) ++ ++Copyright (C) 2006-2010 by the following authors: ++- Artur Huillet (ahuillet) ++- Ben Skeggs (darktama, darktama_) ++- B. R. (koala_br) ++- Carlos Martin (carlosmn) ++- Christoph Bumiller (calim, chrisbmr) ++- Dawid Gajownik (gajownik) ++- Dmitry Baryshkov ++- Dmitry Eremin-Solenikov (lumag) ++- EdB (edb_) ++- Erik Waling (erikwaling) ++- Francisco Jerez (curro, curro_, currojerez) ++- imirkin (imirkin) ++- jb17bsome (jb17bsome) ++- Jeremy Kolb (kjeremy) ++- Laurent Carlier (lordheavy) ++- Luca Barbieri (lb, lb1) ++- Maarten Maathuis (stillunknown) ++- Marcin Kościelnicki (mwk, koriakin) ++- Mark Carey (careym) ++- Matthieu Castet (mat-c) ++- nvidiaman (nvidiaman) ++- Patrice Mandin (pmandin, pmdata) ++- Pekka Paalanen (pq, ppaalanen) ++- Peter Popov (ironpeter) ++- Richard Hughes (hughsient) ++- Rudi Cilibrasi (cilibrar) ++- Serge Martin ++- Simon Raffeiner ++- Stephane Loeuillet (leroutier) ++- Stephane Marchesin (marcheu) ++- sturmflut (sturmflut) ++- Sylvain Munaut ++- Victor Stinner (haypo) ++- Wladmir van der Laan (miathan6) ++- Younes Manton (ymanton) ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++"Software"), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice (including the ++next paragraph) shall be included in all copies or substantial ++portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++*/ ++ ++ ++#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 ++#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 ++#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 ++#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 ++#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 ++#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 ++#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 ++#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 ++#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 ++#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 ++#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 ++#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 ++ ++#endif /* NV_3DDEFS_XML */ +diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c +new file mode 100644 +index 0000000..ea3e642 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_buffer.c +@@ -0,0 +1,489 @@ ++ ++#include "util/u_inlines.h" ++#include "util/u_memory.h" ++#include "util/u_math.h" ++ ++#define NOUVEAU_NVC0 ++#include "nouveau/nouveau_screen.h" ++#include "nouveau/nouveau_winsys.h" ++#undef NOUVEAU_NVC0 ++ ++#include "nvc0_context.h" ++#include "nvc0_resource.h" ++ ++struct nvc0_transfer { ++ struct pipe_transfer base; ++}; ++ ++static INLINE struct nvc0_transfer * ++nvc0_transfer(struct pipe_transfer *transfer) ++{ ++ return (struct nvc0_transfer *)transfer; ++} ++ ++static INLINE boolean ++nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, ++ unsigned domain) ++{ ++ if (domain == NOUVEAU_BO_VRAM) { ++ buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo, ++ &buf->offset); ++ if (!buf->bo) ++ return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART); ++ } else ++ if (domain == NOUVEAU_BO_GART) { ++ buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo, ++ &buf->offset); ++ if (!buf->bo) ++ return FALSE; ++ } ++ if (domain != NOUVEAU_BO_GART) { ++ if (!buf->data) { ++ buf->data = MALLOC(buf->base.width0); ++ if (!buf->data) ++ return FALSE; ++ } ++ } ++ buf->domain = domain; ++ return TRUE; ++} ++ ++static INLINE void ++release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) ++{ ++ if (fence && fence->state != NVC0_FENCE_STATE_SIGNALLED) { ++ nvc0_fence_sched_release(fence, *mm); ++ } else { ++ nvc0_mm_free(*mm); ++ } ++ (*mm) = NULL; ++} ++ ++static INLINE boolean ++nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, ++ unsigned domain) ++{ ++ nouveau_bo_ref(NULL, &buf->bo); ++ ++ if (buf->mm) ++ release_allocation(&buf->mm, buf->fence); ++ ++ return nvc0_buffer_allocate(screen, buf, domain); ++} ++ ++static void ++nvc0_buffer_destroy(struct pipe_screen *pscreen, ++ struct pipe_resource *presource) ++{ ++ struct nvc0_resource *res = nvc0_resource(presource); ++ ++ nouveau_bo_ref(NULL, &res->bo); ++ ++ if (res->mm) ++ release_allocation(&res->mm, res->fence); ++ ++ if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) ++ FREE(res->data); ++ ++ FREE(res); ++} ++ ++/* Maybe just migrate to GART right away if we actually need to do this. */ ++boolean ++nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, ++ unsigned start, unsigned size) ++{ ++ struct nvc0_mm_allocation *mm; ++ struct nouveau_bo *bounce = NULL; ++ uint32_t offset; ++ ++ assert(buf->domain == NOUVEAU_BO_VRAM); ++ ++ mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); ++ if (!bounce) ++ return FALSE; ++ ++ nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART, ++ buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, ++ size); ++ ++ if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD)) ++ return FALSE; ++ memcpy(buf->data + start, bounce->map, size); ++ nouveau_bo_unmap(bounce); ++ ++ buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; ++ ++ nouveau_bo_ref(NULL, &bounce); ++ if (mm) ++ nvc0_mm_free(mm); ++ return TRUE; ++} ++ ++static boolean ++nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, ++ unsigned start, unsigned size) ++{ ++ struct nvc0_mm_allocation *mm; ++ struct nouveau_bo *bounce = NULL; ++ uint32_t offset; ++ ++ if (size <= 192) { ++ nvc0_m2mf_push_linear(nvc0, buf->bo, buf->domain, buf->offset + start, ++ size, buf->data + start); ++ return TRUE; ++ } ++ ++ mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); ++ if (!bounce) ++ return FALSE; ++ ++ nouveau_bo_map_range(bounce, offset, size, ++ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); ++ memcpy(bounce->map, buf->data + start, size); ++ nouveau_bo_unmap(bounce); ++ ++ nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, ++ bounce, offset, NOUVEAU_BO_GART, size); ++ ++ nouveau_bo_ref(NULL, &bounce); ++ if (mm) ++ release_allocation(&mm, nvc0->screen->fence.current); ++ ++ if (start == 0 && size == buf->base.width0) ++ buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; ++ return TRUE; ++} ++ ++static struct pipe_transfer * ++nvc0_buffer_transfer_get(struct pipe_context *pipe, ++ struct pipe_resource *resource, ++ unsigned level, ++ unsigned usage, ++ const struct pipe_box *box) ++{ ++ struct nvc0_resource *buf = nvc0_resource(resource); ++ struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer); ++ if (!xfr) ++ return NULL; ++ ++ xfr->base.resource = resource; ++ xfr->base.box.x = box->x; ++ xfr->base.box.width = box->width; ++ xfr->base.usage = usage; ++ ++ if (buf->domain == NOUVEAU_BO_VRAM) { ++ if (usage & PIPE_TRANSFER_READ) { ++ if (buf->status & NVC0_BUFFER_STATUS_DIRTY) ++ nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); ++ } ++ } ++ ++ return &xfr->base; ++} ++ ++static void ++nvc0_buffer_transfer_destroy(struct pipe_context *pipe, ++ struct pipe_transfer *transfer) ++{ ++ struct nvc0_resource *buf = nvc0_resource(transfer->resource); ++ struct nvc0_transfer *xfr = nvc0_transfer(transfer); ++ ++ if (xfr->base.usage & PIPE_TRANSFER_WRITE) { ++ /* writing is worse */ ++ nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000); ++ ++ if (buf->domain == NOUVEAU_BO_VRAM) { ++ nvc0_buffer_upload(nvc0_context(pipe), buf, ++ transfer->box.x, transfer->box.width); ++ } ++ ++ if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER | ++ PIPE_BIND_INDEX_BUFFER))) ++ nvc0_context(pipe)->vbo_dirty = TRUE; ++ } ++ ++ FREE(xfr); ++} ++ ++static INLINE boolean ++nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw) ++{ ++ if (rw == PIPE_TRANSFER_READ) { ++ if (!buf->fence_wr) ++ return TRUE; ++ if (!nvc0_fence_wait(buf->fence_wr)) ++ return FALSE; ++ } else { ++ if (!buf->fence) ++ return TRUE; ++ if (!nvc0_fence_wait(buf->fence)) ++ return FALSE; ++ ++ nvc0_fence_reference(&buf->fence, NULL); ++ } ++ nvc0_fence_reference(&buf->fence_wr, NULL); ++ ++ return TRUE; ++} ++ ++static INLINE boolean ++nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw) ++{ ++ if (rw == PIPE_TRANSFER_READ) ++ return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr)); ++ else ++ return (buf->fence && !nvc0_fence_signalled(buf->fence)); ++} ++ ++static void * ++nvc0_buffer_transfer_map(struct pipe_context *pipe, ++ struct pipe_transfer *transfer) ++{ ++ struct nvc0_transfer *xfr = nvc0_transfer(transfer); ++ struct nvc0_resource *buf = nvc0_resource(transfer->resource); ++ struct nouveau_bo *bo = buf->bo; ++ uint8_t *map; ++ int ret; ++ uint32_t offset = xfr->base.box.x; ++ uint32_t flags; ++ ++ nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250); ++ ++ if (buf->domain != NOUVEAU_BO_GART) ++ return buf->data + offset; ++ ++ if (buf->mm) ++ flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR; ++ else ++ flags = nouveau_screen_transfer_flags(xfr->base.usage); ++ ++ offset += buf->offset; ++ ++ ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags); ++ if (ret) ++ return NULL; ++ map = bo->map; ++ ++ /* Unmap right now. Since multiple buffers can share a single nouveau_bo, ++ * not doing so might make future maps fail or trigger "reloc while mapped" ++ * errors. For now, mappings to userspace are guaranteed to be persistent. ++ */ ++ nouveau_bo_unmap(bo); ++ ++ if (buf->mm) { ++ if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) { ++ if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE)) ++ return NULL; ++ } else ++ if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { ++ nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE); ++ } ++ } ++ return map; ++} ++ ++ ++ ++static void ++nvc0_buffer_transfer_flush_region(struct pipe_context *pipe, ++ struct pipe_transfer *transfer, ++ const struct pipe_box *box) ++{ ++ struct nvc0_resource *res = nvc0_resource(transfer->resource); ++ struct nouveau_bo *bo = res->bo; ++ unsigned offset = res->offset + transfer->box.x + box->x; ++ ++ /* not using non-snoop system memory yet, no need for cflush */ ++ if (1) ++ return; ++ ++ /* XXX: maybe need to upload for VRAM buffers here */ ++ ++ nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width); ++} ++ ++static void ++nvc0_buffer_transfer_unmap(struct pipe_context *pipe, ++ struct pipe_transfer *transfer) ++{ ++ /* we've called nouveau_bo_unmap right after map */ ++} ++ ++const struct u_resource_vtbl nvc0_buffer_vtbl = ++{ ++ u_default_resource_get_handle, /* get_handle */ ++ nvc0_buffer_destroy, /* resource_destroy */ ++ NULL, /* is_resource_referenced */ ++ nvc0_buffer_transfer_get, /* get_transfer */ ++ nvc0_buffer_transfer_destroy, /* transfer_destroy */ ++ nvc0_buffer_transfer_map, /* transfer_map */ ++ nvc0_buffer_transfer_flush_region, /* transfer_flush_region */ ++ nvc0_buffer_transfer_unmap, /* transfer_unmap */ ++ u_default_transfer_inline_write /* transfer_inline_write */ ++}; ++ ++struct pipe_resource * ++nvc0_buffer_create(struct pipe_screen *pscreen, ++ const struct pipe_resource *templ) ++{ ++ struct nvc0_screen *screen = nvc0_screen(pscreen); ++ struct nvc0_resource *buffer; ++ boolean ret; ++ ++ buffer = CALLOC_STRUCT(nvc0_resource); ++ if (!buffer) ++ return NULL; ++ ++ buffer->base = *templ; ++ buffer->vtbl = &nvc0_buffer_vtbl; ++ pipe_reference_init(&buffer->base.reference, 1); ++ buffer->base.screen = pscreen; ++ ++ if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER) ++ ret = nvc0_buffer_allocate(screen, buffer, 0); ++ else ++ ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART); ++ ++ if (ret == FALSE) ++ goto fail; ++ ++ return &buffer->base; ++ ++fail: ++ FREE(buffer); ++ return NULL; ++} ++ ++ ++struct pipe_resource * ++nvc0_user_buffer_create(struct pipe_screen *pscreen, ++ void *ptr, ++ unsigned bytes, ++ unsigned bind) ++{ ++ struct nvc0_resource *buffer; ++ ++ buffer = CALLOC_STRUCT(nvc0_resource); ++ if (!buffer) ++ return NULL; ++ ++ pipe_reference_init(&buffer->base.reference, 1); ++ buffer->vtbl = &nvc0_buffer_vtbl; ++ buffer->base.screen = pscreen; ++ buffer->base.format = PIPE_FORMAT_R8_UNORM; ++ buffer->base.usage = PIPE_USAGE_IMMUTABLE; ++ buffer->base.bind = bind; ++ buffer->base.width0 = bytes; ++ buffer->base.height0 = 1; ++ buffer->base.depth0 = 1; ++ ++ buffer->data = ptr; ++ buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY; ++ ++ return &buffer->base; ++} ++ ++/* Like download, but for GART buffers. Merge ? */ ++static INLINE boolean ++nvc0_buffer_data_fetch(struct nvc0_resource *buf, ++ struct nouveau_bo *bo, unsigned offset, unsigned size) ++{ ++ if (!buf->data) { ++ buf->data = MALLOC(size); ++ if (!buf->data) ++ return FALSE; ++ } ++ if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD)) ++ return FALSE; ++ memcpy(buf->data, bo->map, size); ++ nouveau_bo_unmap(bo); ++ ++ return TRUE; ++} ++ ++/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ ++boolean ++nvc0_buffer_migrate(struct nvc0_context *nvc0, ++ struct nvc0_resource *buf, const unsigned new_domain) ++{ ++ struct nvc0_screen *screen = nvc0_screen(buf->base.screen); ++ struct nouveau_bo *bo; ++ const unsigned old_domain = buf->domain; ++ unsigned size = buf->base.width0; ++ unsigned offset; ++ int ret; ++ ++ assert(new_domain != old_domain); ++ ++ if (new_domain == NOUVEAU_BO_GART && old_domain == 0) { ++ if (!nvc0_buffer_allocate(screen, buf, new_domain)) ++ return FALSE; ++ ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR | ++ NOUVEAU_BO_NOSYNC); ++ if (ret) ++ return ret; ++ memcpy(buf->bo->map, buf->data, size); ++ nouveau_bo_unmap(buf->bo); ++ FREE(buf->data); ++ } else ++ if (old_domain != 0 && new_domain != 0) { ++ struct nvc0_mm_allocation *mm = buf->mm; ++ ++ if (new_domain == NOUVEAU_BO_VRAM) { ++ /* keep a system memory copy of our data in case we hit a fallback */ ++ if (!nvc0_buffer_data_fetch(buf, buf->bo, buf->offset, size)) ++ return FALSE; ++ debug_printf("migrating %u KiB to VRAM\n", size / 1024); ++ } ++ ++ offset = buf->offset; ++ bo = buf->bo; ++ buf->bo = NULL; ++ buf->mm = NULL; ++ nvc0_buffer_allocate(screen, buf, new_domain); ++ ++ nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, new_domain, ++ bo, offset, old_domain, buf->base.width0); ++ ++ nouveau_bo_ref(NULL, &bo); ++ if (mm) ++ release_allocation(&mm, screen->fence.current); ++ } else ++ if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) { ++ if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM)) ++ return FALSE; ++ if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0)) ++ return FALSE; ++ } else ++ return FALSE; ++ ++ assert(buf->domain == new_domain); ++ return TRUE; ++} ++ ++/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. ++ * We'd like to only allocate @size bytes here, but then we'd have to rebase ++ * the vertex indices ... ++ */ ++boolean ++nvc0_user_buffer_upload(struct nvc0_resource *buf, unsigned base, unsigned size) ++{ ++ struct nvc0_screen *screen = nvc0_screen(buf->base.screen); ++ int ret; ++ ++ assert(buf->status & NVC0_BUFFER_STATUS_USER_MEMORY); ++ ++ buf->base.width0 = base + size; ++ if (!nvc0_buffer_reallocate(screen, buf, NOUVEAU_BO_GART)) ++ return FALSE; ++ ++ ret = nouveau_bo_map_range(buf->bo, buf->offset + base, size, ++ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); ++ if (ret) ++ return FALSE; ++ memcpy(buf->bo->map, buf->data + base, size); ++ nouveau_bo_unmap(buf->bo); ++ ++ return TRUE; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c +new file mode 100644 +index 0000000..2118abb +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_context.c +@@ -0,0 +1,164 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "draw/draw_context.h" ++#include "pipe/p_defines.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_screen.h" ++#include "nvc0_resource.h" ++ ++#include "nouveau/nouveau_reloc.h" ++ ++static void ++nvc0_flush(struct pipe_context *pipe, unsigned flags, ++ struct pipe_fence_handle **fence) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ if (flags & PIPE_FLUSH_TEXTURE_CACHE) { ++ BEGIN_RING(chan, RING_3D(SERIALIZE), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); ++ OUT_RING (chan, 0x00); ++ } ++ ++ if (fence) { ++ nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE); ++ } ++ ++ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { ++ FIRE_RING(chan); ++ ++ nvc0_screen_fence_next(nvc0->screen); ++ } ++} ++ ++static void ++nvc0_destroy(struct pipe_context *pipe) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ draw_destroy(nvc0->draw); ++ ++ if (nvc0->screen->cur_ctx == nvc0) ++ nvc0->screen->cur_ctx = NULL; ++ ++ FREE(nvc0); ++} ++ ++struct pipe_context * ++nvc0_create(struct pipe_screen *pscreen, void *priv) ++{ ++ struct pipe_winsys *pipe_winsys = pscreen->winsys; ++ struct nvc0_screen *screen = nvc0_screen(pscreen); ++ struct nvc0_context *nvc0; ++ ++ nvc0 = CALLOC_STRUCT(nvc0_context); ++ if (!nvc0) ++ return NULL; ++ nvc0->screen = screen; ++ ++ nvc0->pipe.winsys = pipe_winsys; ++ nvc0->pipe.screen = pscreen; ++ nvc0->pipe.priv = priv; ++ ++ nvc0->pipe.destroy = nvc0_destroy; ++ ++ nvc0->pipe.draw_vbo = nvc0_draw_vbo; ++ nvc0->pipe.clear = nvc0_clear; ++ ++ nvc0->pipe.flush = nvc0_flush; ++ ++ screen->base.channel->user_private = nvc0; ++ ++ nvc0_init_query_functions(nvc0); ++ nvc0_init_surface_functions(nvc0); ++ nvc0_init_state_functions(nvc0); ++ nvc0_init_resource_functions(&nvc0->pipe); ++ ++ nvc0->draw = draw_create(&nvc0->pipe); ++ assert(nvc0->draw); ++ draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); ++ ++ return &nvc0->pipe; ++} ++ ++struct resident { ++ struct nvc0_resource *res; ++ uint32_t flags; ++}; ++ ++void ++nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, ++ struct nvc0_resource *resource, uint32_t flags) ++{ ++ struct resident rsd = { resource, flags }; ++ ++ if (!resource->bo) ++ return; ++ ++ /* We don't need to reference the resource here, it will be referenced ++ * in the context/state, and bufctx will be reset when state changes. ++ */ ++ util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd); ++} ++ ++void ++nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, ++ struct nvc0_resource *resource) ++{ ++ struct resident *rsd, *top; ++ unsigned i; ++ ++ for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) { ++ rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i); ++ ++ if (rsd->res == resource) { ++ top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident); ++ if (rsd != top) ++ *rsd = *top; ++ break; ++ } ++ } ++} ++ ++void ++nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) ++{ ++ struct resident *rsd; ++ struct util_dynarray *array; ++ unsigned ctx, i; ++ ++ for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { ++ array = &nvc0->residents[ctx]; ++ ++ for (i = 0; i < array->size / sizeof(struct resident); ++i) { ++ rsd = util_dynarray_element(array, struct resident, i); ++ ++ nvc0_resource_validate(rsd->res, rsd->flags); ++ } ++ } ++ ++ nvc0_screen_make_buffers_resident(nvc0->screen); ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h +new file mode 100644 +index 0000000..9411798 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_context.h +@@ -0,0 +1,227 @@ ++#ifndef __NVC0_CONTEXT_H__ ++#define __NVC0_CONTEXT_H__ ++ ++#include ++#include "pipe/p_context.h" ++#include "pipe/p_defines.h" ++#include "pipe/p_state.h" ++ ++#include "util/u_memory.h" ++#include "util/u_math.h" ++#include "util/u_inlines.h" ++#include "util/u_dynarray.h" ++ ++#include "draw/draw_vertex.h" ++ ++#include "nvc0_winsys.h" ++#include "nvc0_stateobj.h" ++#include "nvc0_screen.h" ++#include "nvc0_program.h" ++#include "nvc0_resource.h" ++ ++#include "nvc0_3ddefs.xml.h" ++#include "nvc0_3d.xml.h" ++#include "nvc0_2d.xml.h" ++#include "nvc0_m2mf.xml.h" ++ ++#define NOUVEAU_ERR(fmt, args...) \ ++ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); ++ ++#ifdef NOUVEAU_DEBUG ++# define NOUVEAU_DBG(args...) printf(args); ++#else ++# define NOUVEAU_DBG(args...) ++#endif ++ ++#define NVC0_NEW_BLEND (1 << 0) ++#define NVC0_NEW_RASTERIZER (1 << 1) ++#define NVC0_NEW_ZSA (1 << 2) ++#define NVC0_NEW_VERTPROG (1 << 3) ++#define NVC0_NEW_TCTLPROG (1 << 4) ++#define NVC0_NEW_TEVLPROG (1 << 5) ++#define NVC0_NEW_GMTYPROG (1 << 6) ++#define NVC0_NEW_FRAGPROG (1 << 7) ++#define NVC0_NEW_BLEND_COLOUR (1 << 8) ++#define NVC0_NEW_STENCIL_REF (1 << 9) ++#define NVC0_NEW_CLIP (1 << 10) ++#define NVC0_NEW_SAMPLE_MASK (1 << 11) ++#define NVC0_NEW_FRAMEBUFFER (1 << 12) ++#define NVC0_NEW_STIPPLE (1 << 13) ++#define NVC0_NEW_SCISSOR (1 << 14) ++#define NVC0_NEW_VIEWPORT (1 << 15) ++#define NVC0_NEW_ARRAYS (1 << 16) ++#define NVC0_NEW_VERTEX (1 << 17) ++#define NVC0_NEW_CONSTBUF (1 << 18) ++#define NVC0_NEW_TEXTURES (1 << 19) ++#define NVC0_NEW_SAMPLERS (1 << 20) ++ ++#define NVC0_BUFCTX_CONSTANT 0 ++#define NVC0_BUFCTX_FRAME 1 ++#define NVC0_BUFCTX_VERTEX 2 ++#define NVC0_BUFCTX_TEXTURES 3 ++#define NVC0_BUFCTX_COUNT 4 ++ ++struct nvc0_context { ++ struct pipe_context pipe; ++ ++ struct nvc0_screen *screen; ++ ++ struct util_dynarray residents[NVC0_BUFCTX_COUNT]; ++ ++ uint32_t dirty; ++ ++ struct { ++ uint32_t instance_elts; /* bitmask of per-instance elements */ ++ uint32_t instance_base; ++ int32_t index_bias; ++ boolean prim_restart; ++ uint8_t num_vtxbufs; ++ uint8_t num_vtxelts; ++ uint8_t num_textures[5]; ++ uint8_t num_samplers[5]; ++ uint16_t scissor; ++ uint32_t uniform_buffer_bound[5]; ++ } state; ++ ++ struct nvc0_blend_stateobj *blend; ++ struct nvc0_rasterizer_stateobj *rast; ++ struct nvc0_zsa_stateobj *zsa; ++ struct nvc0_vertex_stateobj *vertex; ++ ++ struct nvc0_program *vertprog; ++ struct nvc0_program *tctlprog; ++ struct nvc0_program *tevlprog; ++ struct nvc0_program *gmtyprog; ++ struct nvc0_program *fragprog; ++ ++ struct pipe_resource *constbuf[5][16]; ++ uint16_t constbuf_dirty[5]; ++ ++ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; ++ unsigned num_vtxbufs; ++ struct pipe_index_buffer idxbuf; ++ uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ ++ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ ++ unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */ ++ unsigned vbo_max_index; ++ ++ struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS]; ++ unsigned num_textures[5]; ++ struct nvc0_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS]; ++ unsigned num_samplers[5]; ++ ++ struct pipe_framebuffer_state framebuffer; ++ struct pipe_blend_color blend_colour; ++ struct pipe_stencil_ref stencil_ref; ++ struct pipe_poly_stipple stipple; ++ struct pipe_scissor_state scissor; ++ struct pipe_viewport_state viewport; ++ struct pipe_clip_state clip; ++ ++ unsigned sample_mask; ++ ++ boolean vbo_dirty; ++ boolean vbo_push_hint; ++ ++ struct draw_context *draw; ++}; ++ ++static INLINE struct nvc0_context * ++nvc0_context(struct pipe_context *pipe) ++{ ++ return (struct nvc0_context *)pipe; ++} ++ ++struct nvc0_surface { ++ struct pipe_surface base; ++ uint32_t offset; ++ uint32_t width; ++ uint16_t height; ++ uint16_t depth; ++}; ++ ++static INLINE struct nvc0_surface * ++nvc0_surface(struct pipe_surface *ps) ++{ ++ return (struct nvc0_surface *)ps; ++} ++ ++/* nvc0_context.c */ ++struct pipe_context *nvc0_create(struct pipe_screen *, void *); ++ ++void nvc0_bufctx_emit_relocs(struct nvc0_context *); ++void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, ++ struct nvc0_resource *, uint32_t flags); ++void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, ++ struct nvc0_resource *); ++static INLINE void ++nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) ++{ ++ util_dynarray_resize(&nvc0->residents[ctx], 0); ++} ++ ++/* nvc0_draw.c */ ++extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); ++ ++/* nvc0_program.c */ ++boolean nvc0_program_translate(struct nvc0_program *); ++void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); ++ ++/* nvc0_query.c */ ++void nvc0_init_query_functions(struct nvc0_context *); ++ ++/* nvc0_shader_state.c */ ++void nvc0_vertprog_validate(struct nvc0_context *); ++void nvc0_tctlprog_validate(struct nvc0_context *); ++void nvc0_tevlprog_validate(struct nvc0_context *); ++void nvc0_gmtyprog_validate(struct nvc0_context *); ++void nvc0_fragprog_validate(struct nvc0_context *); ++ ++/* nvc0_state.c */ ++extern void nvc0_init_state_functions(struct nvc0_context *); ++ ++/* nvc0_state_validate.c */ ++extern boolean nvc0_state_validate(struct nvc0_context *); ++ ++/* nvc0_surface.c */ ++extern void nvc0_clear(struct pipe_context *, unsigned buffers, ++ const float *rgba, double depth, unsigned stencil); ++extern void nvc0_init_surface_functions(struct nvc0_context *); ++ ++/* nvc0_tex.c */ ++void nvc0_validate_textures(struct nvc0_context *); ++void nvc0_validate_samplers(struct nvc0_context *); ++ ++struct pipe_sampler_view * ++nvc0_create_sampler_view(struct pipe_context *, ++ struct pipe_resource *, ++ const struct pipe_sampler_view *); ++ ++/* nvc0_transfer.c */ ++void ++nvc0_m2mf_push_linear(struct nvc0_context *nvc0, ++ struct nouveau_bo *dst, unsigned domain, int offset, ++ unsigned size, void *data); ++void ++nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, ++ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, ++ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, ++ unsigned size); ++ ++/* nvc0_vbo.c */ ++void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); ++ ++void * ++nvc0_vertex_state_create(struct pipe_context *pipe, ++ unsigned num_elements, ++ const struct pipe_vertex_element *elements); ++void ++nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso); ++ ++void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0); ++ ++/* nvc0_push.c */ ++void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); ++void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *); ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c +new file mode 100644 +index 0000000..ac7e9f6 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_draw.c +@@ -0,0 +1,88 @@ ++/* ++ * Copyright 2008 Ben Skeggs ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "draw/draw_pipe.h" ++ ++#include "nvc0_context.h" ++ ++struct nvc0_render_stage { ++ struct draw_stage stage; ++ struct nvc0_context *nvc0; ++}; ++ ++static INLINE struct nvc0_render_stage * ++nvc0_render_stage(struct draw_stage *stage) ++{ ++ return (struct nvc0_render_stage *)stage; ++} ++ ++static void ++nvc0_render_point(struct draw_stage *stage, struct prim_header *prim) ++{ ++ NOUVEAU_ERR("\n"); ++} ++ ++static void ++nvc0_render_line(struct draw_stage *stage, struct prim_header *prim) ++{ ++ NOUVEAU_ERR("\n"); ++} ++ ++static void ++nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim) ++{ ++ NOUVEAU_ERR("\n"); ++} ++ ++static void ++nvc0_render_flush(struct draw_stage *stage, unsigned flags) ++{ ++} ++ ++static void ++nvc0_render_reset_stipple_counter(struct draw_stage *stage) ++{ ++ NOUVEAU_ERR("\n"); ++} ++ ++static void ++nvc0_render_destroy(struct draw_stage *stage) ++{ ++ FREE(stage); ++} ++ ++struct draw_stage * ++nvc0_draw_render_stage(struct nvc0_context *nvc0) ++{ ++ struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage); ++ ++ rs->nvc0 = nvc0; ++ rs->stage.draw = nvc0->draw; ++ rs->stage.destroy = nvc0_render_destroy; ++ rs->stage.point = nvc0_render_point; ++ rs->stage.line = nvc0_render_line; ++ rs->stage.tri = nvc0_render_tri; ++ rs->stage.flush = nvc0_render_flush; ++ rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter; ++ ++ return &rs->stage; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c +new file mode 100644 +index 0000000..9d2c48c +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_fence.c +@@ -0,0 +1,203 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "nvc0_fence.h" ++#include "nvc0_context.h" ++#include "nvc0_screen.h" ++ ++#ifdef PIPE_OS_UNIX ++#include ++#endif ++ ++boolean ++nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence, ++ boolean emit) ++{ ++ *fence = CALLOC_STRUCT(nvc0_fence); ++ if (!*fence) ++ return FALSE; ++ ++ (*fence)->screen = screen; ++ (*fence)->ref = 1; ++ ++ if (emit) ++ nvc0_fence_emit(*fence); ++ ++ return TRUE; ++} ++ ++void ++nvc0_fence_emit(struct nvc0_fence *fence) ++{ ++ struct nvc0_screen *screen = fence->screen; ++ struct nouveau_channel *chan = screen->base.channel; ++ ++ fence->sequence = ++screen->fence.sequence; ++ ++ assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); ++ ++ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); ++ OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); ++ OUT_RING (chan, fence->sequence); ++ OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT | ++ (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT)); ++ ++ ++fence->ref; ++ ++ if (screen->fence.tail) ++ screen->fence.tail->next = fence; ++ else ++ screen->fence.head = fence; ++ ++ screen->fence.tail = fence; ++ ++ fence->state = NVC0_FENCE_STATE_EMITTED; ++} ++ ++static void ++nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence); ++ ++void ++nvc0_fence_del(struct nvc0_fence *fence) ++{ ++ struct nvc0_fence *it; ++ struct nvc0_screen *screen = fence->screen; ++ ++ if (fence->state == NVC0_FENCE_STATE_EMITTED) { ++ if (fence == screen->fence.head) { ++ screen->fence.head = fence->next; ++ if (!screen->fence.head) ++ screen->fence.tail = NULL; ++ } else { ++ for (it = screen->fence.head; it && it->next != fence; it = it->next); ++ it->next = fence->next; ++ if (screen->fence.tail == fence) ++ screen->fence.tail = it; ++ } ++ } ++ ++ if (fence->buffers) { ++ debug_printf("WARNING: deleting fence with buffers " ++ "still hooked to it !\n"); ++ nvc0_fence_trigger_release_buffers(fence); ++ } ++ ++ FREE(fence); ++} ++ ++static void ++nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) ++{ ++ struct nvc0_mm_allocation *alloc = fence->buffers; ++ ++ while (alloc) { ++ struct nvc0_mm_allocation *next = alloc->next; ++ nvc0_mm_free(alloc); ++ alloc = next; ++ }; ++ fence->buffers = NULL; ++} ++ ++static void ++nvc0_screen_fence_update(struct nvc0_screen *screen) ++{ ++ struct nvc0_fence *fence; ++ struct nvc0_fence *next = NULL; ++ uint32_t sequence = screen->fence.map[0]; ++ ++ if (screen->fence.sequence_ack == sequence) ++ return; ++ screen->fence.sequence_ack = sequence; ++ ++ for (fence = screen->fence.head; fence; fence = next) { ++ next = fence->next; ++ sequence = fence->sequence; ++ ++ fence->state = NVC0_FENCE_STATE_SIGNALLED; ++ ++ if (fence->buffers) ++ nvc0_fence_trigger_release_buffers(fence); ++ ++ nvc0_fence_reference(&fence, NULL); ++ ++ if (sequence == screen->fence.sequence_ack) ++ break; ++ } ++ screen->fence.head = next; ++ if (!next) ++ screen->fence.tail = NULL; ++} ++ ++#define NVC0_FENCE_MAX_SPINS (1 << 17) ++ ++boolean ++nvc0_fence_signalled(struct nvc0_fence *fence) ++{ ++ struct nvc0_screen *screen = fence->screen; ++ ++ if (fence->state == NVC0_FENCE_STATE_EMITTED) ++ nvc0_screen_fence_update(screen); ++ ++ return fence->state == NVC0_FENCE_STATE_SIGNALLED; ++} ++ ++boolean ++nvc0_fence_wait(struct nvc0_fence *fence) ++{ ++ struct nvc0_screen *screen = fence->screen; ++ int spins = 0; ++ ++ if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { ++ nvc0_fence_emit(fence); ++ ++ FIRE_RING(screen->base.channel); ++ ++ if (fence == screen->fence.current) ++ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); ++ } ++ ++ do { ++ nvc0_screen_fence_update(screen); ++ ++ if (fence->state == NVC0_FENCE_STATE_SIGNALLED) ++ return TRUE; ++ spins++; ++#ifdef PIPE_OS_UNIX ++ if (!(spins % 8)) /* donate a few cycles */ ++ sched_yield(); ++#endif ++ } while (spins < NVC0_FENCE_MAX_SPINS); ++ ++ if (spins > 9000) ++ NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); ++ ++ return FALSE; ++} ++ ++void ++nvc0_screen_fence_next(struct nvc0_screen *screen) ++{ ++ nvc0_fence_emit(screen->fence.current); ++ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); ++ nvc0_screen_fence_update(screen); ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h +new file mode 100644 +index 0000000..e63c164 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_fence.h +@@ -0,0 +1,48 @@ ++ ++#ifndef __NVC0_FENCE_H__ ++#define __NVC0_FENCE_H__ ++ ++#include "util/u_inlines.h" ++#include "util/u_double_list.h" ++ ++#define NVC0_FENCE_STATE_AVAILABLE 0 ++#define NVC0_FENCE_STATE_EMITTED 1 ++#define NVC0_FENCE_STATE_SIGNALLED 2 ++ ++struct nvc0_mm_allocation; ++ ++struct nvc0_fence { ++ struct nvc0_fence *next; ++ struct nvc0_screen *screen; ++ int state; ++ int ref; ++ uint32_t sequence; ++ struct nvc0_mm_allocation *buffers; ++}; ++ ++void nvc0_fence_emit(struct nvc0_fence *); ++void nvc0_fence_del(struct nvc0_fence *); ++ ++boolean nvc0_fence_wait(struct nvc0_fence *); ++boolean nvc0_fence_signalled(struct nvc0_fence *); ++ ++static INLINE void ++nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence) ++{ ++ if (*ref) { ++ if (--(*ref)->ref == 0) ++ nvc0_fence_del(*ref); ++ } ++ if (fence) ++ ++fence->ref; ++ ++ *ref = fence; ++} ++ ++static INLINE struct nvc0_fence * ++nvc0_fence(struct pipe_fence_handle *fence) ++{ ++ return (struct nvc0_fence *)fence; ++} ++ ++#endif // __NVC0_FENCE_H__ +diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c +new file mode 100644 +index 0000000..5d02357 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_formats.c +@@ -0,0 +1,462 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "nvc0_screen.h" ++#include "nv50_texture.xml.h" ++#include "nvc0_3d.xml.h" ++#include "nv50_defs.xml.h" ++#include "nv50_texture.xml.h" ++#include "pipe/p_defines.h" ++ ++#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ ++ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ ++ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ ++ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ ++ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ ++ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ ++ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ ++ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ ++ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ ++ NV50_TIC_0_FMT_##sz, \ ++ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz | \ ++ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 | \ ++ (r << 31) ++ ++#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \ ++ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \ ++ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ ++ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \ ++ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ ++ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \ ++ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ ++ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \ ++ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ ++ NV50_TIC_0_FMT_##sz, 0 ++ ++#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER ++#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW ++#define RENDER_TARGET PIPE_BIND_RENDER_TARGET ++#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL ++#define SCANOUT PIPE_BIND_SCANOUT ++ ++/* for vertex buffers: */ ++#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 ++#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 ++#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32 ++ ++const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = ++{ ++ /* COMMON FORMATS */ ++ ++ [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM, ++ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, ++ ++ [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM, ++ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, ++ ++ [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB, ++ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB, ++ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM, ++ B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1), ++ SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, ++ ++ [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM, ++ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), ++ SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, ++ ++ [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, ++ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, ++ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), ++ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, ++ ++ [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM, ++ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1), ++ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER }, ++ ++ /* DEPTH/STENCIL FORMATS */ ++ ++ [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM, ++ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_ZETA, 0), ++ SAMPLER_VIEW | DEPTH_STENCIL }, ++ ++ [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM, ++ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), ++ SAMPLER_VIEW | DEPTH_STENCIL }, ++ ++ [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM, ++ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0), ++ SAMPLER_VIEW | DEPTH_STENCIL }, ++ ++ [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8Z24_UNORM, ++ B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0), ++ SAMPLER_VIEW | DEPTH_STENCIL }, ++ ++ [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, ++ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_ZETA, 0), ++ SAMPLER_VIEW | DEPTH_STENCIL }, ++ ++ [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = { ++ NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM, ++ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0), ++ SAMPLER_VIEW | DEPTH_STENCIL }, ++ ++ /* LUMINANCE, ALPHA, INTENSITY */ ++ ++ [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, ++ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM, ++ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, ++ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, ++ A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), ++ SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_L8A8_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, ++ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_L8A8_SRGB] = { 0, ++ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), ++ SAMPLER_VIEW }, ++ ++ /* DXT, RGTC */ ++ ++ [PIPE_FORMAT_DXT1_RGB] = { 0, ++ B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_DXT1_RGBA] = { 0, ++ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_DXT3_RGBA] = { 0, ++ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_DXT5_RGBA] = { 0, ++ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_RGTC1_UNORM] = { 0, ++ B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_RGTC1_SNORM] = { 0, ++ B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_RGTC2_UNORM] = { 0, ++ B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0), ++ SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_RGTC2_SNORM] = { 0, ++ B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0), ++ SAMPLER_VIEW }, ++ ++ /* FLOAT 16 */ ++ ++ [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT, ++ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT, ++ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT, ++ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, ++ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ /* FLOAT 32 */ ++ ++ [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT, ++ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT, ++ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT, ++ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, ++ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ /* ODD FORMATS */ ++ ++ [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT, ++ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0), ++ SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0, ++ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0), ++ SAMPLER_VIEW }, ++ ++ /* SNORM 32 */ ++ ++ [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0, ++ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32B32_SNORM] = { 0, ++ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32_SNORM] = { 0, ++ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32_SNORM] = { 0, ++ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ /* UNORM 32 */ ++ ++ [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0, ++ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32B32_UNORM] = { 0, ++ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32_UNORM] = { 0, ++ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32_UNORM] = { 0, ++ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ /* SNORM 16 */ ++ ++ [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM, ++ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R16G16B16_SNORM] = { 0, ++ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, ++ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, ++ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ /* UNORM 16 */ ++ ++ [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM, ++ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R16G16B16_UNORM] = { 0, ++ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, ++ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, ++ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ /* SNORM 8 */ ++ ++ [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM, ++ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R8G8B8_SNORM] = { 0, ++ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM, ++ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM, ++ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ /* UNORM 8 */ ++ ++ [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM, ++ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB, ++ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), ++ SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM, ++ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB, ++ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0), ++ SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM, ++ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, ++ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, ++ ++ /* SSCALED 32 */ ++ ++ [PIPE_FORMAT_R32G32B32A32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_SINT, ++ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32B32_SSCALED] = { 0, ++ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32_SINT, ++ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32_SSCALED] = { 0, ++ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ /* USCALED 32 */ ++ ++ [PIPE_FORMAT_R32G32B32A32_USCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_UINT, ++ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32B32_USCALED] = { 0, ++ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32G32_USCALED] = { NV50_SURFACE_FORMAT_R32G32_UINT, ++ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R32_USCALED] = { 0, ++ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ /* SSCALED 16 */ ++ ++ [PIPE_FORMAT_R16G16B16A16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_SINT, ++ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16G16B16_SSCALED] = { 0, ++ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16G16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16_SINT, ++ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16_SSCALED] = { NV50_SURFACE_FORMAT_R16_SINT, ++ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ /* USCALED 16 */ ++ ++ [PIPE_FORMAT_R16G16B16A16_USCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_UINT, ++ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16G16B16_USCALED] = { 0, ++ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16G16_USCALED] = { NV50_SURFACE_FORMAT_R16G16_UINT, ++ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R16_USCALED] = { NV50_SURFACE_FORMAT_R16_UINT, ++ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ /* SSCALED 8 */ ++ ++ [PIPE_FORMAT_R8G8B8A8_SSCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_SINT, ++ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R8G8B8_SSCALED] = { 0, ++ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R8G8_SSCALED] = { NV50_SURFACE_FORMAT_R8G8_SINT, ++ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R8_SSCALED] = { NV50_SURFACE_FORMAT_R8_SINT, ++ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ /* USCALED 8 */ ++ ++ [PIPE_FORMAT_R8G8B8A8_USCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_UINT, ++ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R8G8B8_USCALED] = { 0, ++ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R8G8_USCALED] = { NV50_SURFACE_FORMAT_R8G8_UINT, ++ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++ ++ [PIPE_FORMAT_R8_USCALED] = { NV50_SURFACE_FORMAT_R8_UINT, ++ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0), ++ VERTEX_BUFFER | SAMPLER_VIEW }, ++}; +diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h +new file mode 100644 +index 0000000..8da963a +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h +@@ -0,0 +1,235 @@ ++ ++#ifndef __NVC0_PGRAPH_MACROS_H__ ++#define __NVC0_PGRAPH_MACROS_H__ ++ ++/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1 ++ * with bits [src:src+size) in r2 ++ * ++ * bra(n)z annul: no delay slot ++ */ ++ ++/* The comments above the macros describe what they *should* be doing, ++ * but we use less functionality for now. ++ */ ++ ++/* ++ * for (i = 0; i < 8; ++i) ++ * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg); ++ * ++ * [3428] = arg; ++ * ++ * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0) ++ * [0d9c] = 0; ++ * else ++ * [0d9c] = [342c]; ++ */ ++static const uint32_t nvc0_9097_blend_enables[] = ++{ ++ 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */ ++ 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */ ++ 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */ ++ 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */ ++ 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */ ++ 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */ ++ 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */ ++ 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */ ++ 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */ ++}; ++ ++/* ++ * uint64 limit = (parm(0) << 32) | parm(1); ++ * uint64 start = (parm(2) << 32); ++ * ++ * if (limit) { ++ * start |= parm(3); ++ * --limit; ++ * } else { ++ * start |= 1; ++ * } ++ * ++ * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff; ++ * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff; ++ * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff; ++ * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff; ++ */ ++static const uint32_t nvc0_9097_vertex_array_select[] = ++{ ++ 0x00000201, /* 0x00: parm $r2 */ ++ 0x00000301, /* 0x01: parm $r3 */ ++ 0x00000401, /* 0x02: parm $r4 */ ++ 0x00000501, /* 0x03: parm $r5 */ ++ 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */ ++ 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */ ++ 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */ ++ 0x00002041, /* 0x07: send $r4 */ ++ 0x00002841, /* 0x08: send $r5 */ ++ 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */ ++ 0x000010c1, /* 0x0a: exit send $r2 */ ++ 0x00001841, /* 0x0b: send $r3 */ ++}; ++ ++static const uint32_t nvc0_9097_color_mask_brdc[] = ++{ ++ 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */ ++ 0x00000841, /* send $r1 */ ++ 0x00000841, /* send $r1 */ ++ 0x00000841, /* send $r1 */ ++ 0x00000841, /* send $r1 */ ++ 0x00000841, /* send $r1 */ ++ 0x00000841, /* send $r1 */ ++ 0x000008c1, /* exit send $r1 */ ++ 0x00000841, /* send $r1 */ ++}; ++ ++/* ++ * [GL_POLYGON_MODE_FRONT] = arg; ++ * ++ * if (BIT(31 of [0x3410])) ++ * [1a24] = 0x7353; ++ * ++ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) ++ * [02ec] = 0; ++ * else ++ * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE) ++ * [02ec] = BYTE(1 of [0x3410]) << 4; ++ * else ++ * [02ec] = BYTE(0 of [0x3410]) << 4; ++ */ ++static const uint32_t nvc0_9097_poly_mode_front[] = ++{ ++ 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */ ++ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ ++ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ ++ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ ++ 0x00004211, /* 0x04: mov $r2 0x1 */ ++ 0x00180611, /* 0x05: mov $r6 0x60 */ ++ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x07: braz $r7 0xa */ ++ 0x00dac021, /* 0x08: maddr 0x36b */ ++ 0x00800611, /* 0x09: mov $r6 0x200 */ ++ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ ++ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x0c: braz $r7 0xf */ ++ 0x00000841, /* 0x0d: send $r1 */ ++ 0x00000611, /* 0x0e: mov $r6 0 */ ++ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ ++ 0x00003041 /* 0x10: send $r6 */ ++}; ++ ++/* ++ * [GL_POLYGON_MODE_BACK] = arg; ++ * ++ * if (BIT(31 of [0x3410])) ++ * [1a24] = 0x7353; ++ * ++ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) ++ * [02ec] = 0; ++ * else ++ * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE) ++ * [02ec] = BYTE(1 of [0x3410]) << 4; ++ * else ++ * [02ec] = BYTE(0 of [0x3410]) << 4; ++ */ ++/* NOTE: 0x3410 = 0x80002006 by default, ++ * POLYGON_MODE == GL_LINE check replaced by (MODE & 1) ++ * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1 ++ */ ++static const uint32_t nvc0_9097_poly_mode_back[] = ++{ ++ 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */ ++ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ ++ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ ++ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ ++ 0x00004211, /* 0x04: mov $r2 0x1 */ ++ 0x00180611, /* 0x05: mov $r6 0x60 */ ++ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x07: braz $r7 0xa */ ++ 0x00dac021, /* 0x08: maddr 0x36b */ ++ 0x00800611, /* 0x09: mov $r6 0x200 */ ++ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ ++ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x0c: braz $r7 0xf */ ++ 0x00000841, /* 0x0d: send $r1 */ ++ 0x00000611, /* 0x0e: mov $r6 0 */ ++ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ ++ 0x00003041 /* 0x10: send $r6 */ ++}; ++ ++/* ++ * [NVC0_3D_SP_SELECT(4)] = arg ++ * ++ * if BIT(31 of [0x3410]) == 0 ++ * [1a24] = 0x7353; ++ * ++ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41) ++ * [02ec] = 0 ++ * else ++ * if (any POLYGON MODE == LINE) ++ * [02ec] = BYTE(1 of [3410]) << 4; ++ * else ++ * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1 ++ */ ++static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */ ++{ ++ 0x00dac215, /* 0x00: read $r2 0x36b */ ++ 0x00db0315, /* 0x01: read $r3 0x36c */ ++ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ ++ 0x020c0415, /* 0x03: read $r4 0x830 */ ++ 0x00004211, /* 0x04: mov $r2 0x1 */ ++ 0x00180611, /* 0x05: mov $r6 0x60 */ ++ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x07: braz $r7 0xa */ ++ 0x02100021, /* 0x08: maddr 0x840 */ ++ 0x00800611, /* 0x09: mov $r6 0x200 */ ++ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ ++ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x0c: braz $r7 0xf */ ++ 0x00000841, /* 0x0d: send $r1 */ ++ 0x00000611, /* 0x0e: mov $r6 0 */ ++ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ ++ 0x00003041, /* 0x10: send $r6 */ ++}; ++ ++/* ++ * [NVC0_3D_SP_SELECT(3)] = arg ++ * ++ * if BIT(31 of [0x3410]) == 0 ++ * [1a24] = 0x7353; ++ * ++ * if (arg == 0x31) { ++ * if (BIT(2 of [0x3430])) { ++ * int i = 15; do { --i; } while(i); ++ * [0x1a2c] = 0; ++ * } ++ * } ++ * ++ * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31) ++ * [02ec] = 0 ++ * else ++ * if ([any POLYGON_MODE] == GL_LINE) ++ * [02ec] = BYTE(1 of [3410]) << 4; ++ * else ++ * [02ec] = BYTE(0 of [3410]) << 4; ++ */ ++static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */ ++{ ++ 0x00dac215, /* 0x00: read $r2 0x36b */ ++ 0x00db0315, /* 0x01: read $r3 0x36c */ ++ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ ++ 0x02100415, /* 0x03: read $r4 0x840 */ ++ 0x00004211, /* 0x04: mov $r2 0x1 */ ++ 0x00180611, /* 0x05: mov $r6 0x60 */ ++ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x07: braz $r7 0xa */ ++ 0x020c0021, /* 0x08: maddr 0x830 */ ++ 0x00800611, /* 0x09: mov $r6 0x200 */ ++ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ ++ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ ++ 0x0000f807, /* 0x0c: braz $r7 0xf */ ++ 0x00000841, /* 0x0d: send $r1 */ ++ 0x00000611, /* 0x0e: mov $r6 0 */ ++ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ ++ 0x00003041, /* 0x10: send $r6 */ ++}; ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h +new file mode 100644 +index 0000000..3bf628d +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h +@@ -0,0 +1,138 @@ ++#ifndef NVC0_M2MF_XML ++#define NVC0_M2MF_XML ++ ++/* Autogenerated file, DO NOT EDIT manually! ++ ++This file was generated by the rules-ng-ng headergen tool in this git repository: ++http://0x04.net/cgit/index.cgi/rules-ng-ng ++git clone git://0x04.net/rules-ng-ng ++ ++The rules-ng-ng source files this header was generated from are: ++- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29) ++- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) ++- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) ++- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) ++- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) ++ ++Copyright (C) 2006-2010 by the following authors: ++- Artur Huillet (ahuillet) ++- Ben Skeggs (darktama, darktama_) ++- B. R. (koala_br) ++- Carlos Martin (carlosmn) ++- Christoph Bumiller (calim, chrisbmr) ++- Dawid Gajownik (gajownik) ++- Dmitry Baryshkov ++- Dmitry Eremin-Solenikov (lumag) ++- EdB (edb_) ++- Erik Waling (erikwaling) ++- Francisco Jerez (curro, curro_, currojerez) ++- imirkin (imirkin) ++- jb17bsome (jb17bsome) ++- Jeremy Kolb (kjeremy) ++- Laurent Carlier (lordheavy) ++- Luca Barbieri (lb, lb1) ++- Maarten Maathuis (stillunknown) ++- Marcin Kościelnicki (mwk, koriakin) ++- Mark Carey (careym) ++- Matthieu Castet (mat-c) ++- nvidiaman (nvidiaman) ++- Patrice Mandin (pmandin, pmdata) ++- Pekka Paalanen (pq, ppaalanen) ++- Peter Popov (ironpeter) ++- Richard Hughes (hughsient) ++- Rudi Cilibrasi (cilibrar) ++- Serge Martin ++- Simon Raffeiner ++- Stephane Loeuillet (leroutier) ++- Stephane Marchesin (marcheu) ++- sturmflut (sturmflut) ++- Sylvain Munaut ++- Victor Stinner (haypo) ++- Wladmir van der Laan (miathan6) ++- Younes Manton (ymanton) ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++"Software"), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice (including the ++next paragraph) shall be included in all copies or substantial ++portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE ++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION ++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION ++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++*/ ++ ++ ++ ++#define NVC0_M2MF_TILING_MODE_IN 0x00000204 ++ ++#define NVC0_M2MF_TILING_PITCH_IN 0x00000208 ++ ++#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c ++ ++#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210 ++ ++#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214 ++ ++#define NVC0_M2MF_TILING_MODE_OUT 0x00000220 ++ ++#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224 ++ ++#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228 ++ ++#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c ++ ++#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230 ++ ++#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238 ++ ++#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c ++ ++#define NVC0_M2MF_EXEC 0x00000300 ++#define NVC0_M2MF_EXEC_PUSH 0x00000001 ++#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010 ++#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100 ++#define NVC0_M2MF_EXEC_NOTIFY 0x00002000 ++#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000 ++#define NVC0_M2MF_EXEC_INC__SHIFT 20 ++ ++#define NVC0_M2MF_DATA 0x00000304 ++ ++#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c ++ ++#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310 ++ ++#define NVC0_M2MF_PITCH_IN 0x00000314 ++ ++#define NVC0_M2MF_PITCH_OUT 0x00000318 ++ ++#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c ++ ++#define NVC0_M2MF_LINE_COUNT 0x00000320 ++ ++#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c ++ ++#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330 ++ ++#define NVC0_M2MF_NOTIFY 0x00000334 ++ ++#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344 ++ ++#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348 ++ ++#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c ++ ++#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350 ++ ++ ++#endif /* NVC0_M2MF_XML */ +diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c +new file mode 100644 +index 0000000..7c7e134 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_miptree.c +@@ -0,0 +1,327 @@ ++/* ++ * Copyright 2008 Ben Skeggs ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "pipe/p_state.h" ++#include "pipe/p_defines.h" ++#include "util/u_inlines.h" ++#include "util/u_format.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_resource.h" ++#include "nvc0_transfer.h" ++ ++static INLINE uint32_t ++get_tile_dims(unsigned nx, unsigned ny, unsigned nz) ++{ ++ uint32_t tile_mode = 0x000; ++ ++ if (ny > 64) tile_mode = 0x040; /* height 128 tiles */ ++ else ++ if (ny > 32) tile_mode = 0x030; /* height 64 tiles */ ++ else ++ if (ny > 16) tile_mode = 0x020; /* height 32 tiles */ ++ else ++ if (ny > 8) tile_mode = 0x010; /* height 16 tiles */ ++ ++ if (nz == 1) ++ return tile_mode; ++ else ++ if (tile_mode > 0x020) ++ tile_mode = 0x020; ++ ++ if (nz > 16 && tile_mode < 0x020) ++ return tile_mode | 0x500; /* depth 32 tiles */ ++ if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ ++ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ ++ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ ++ ++ return tile_mode | 0x100; ++} ++ ++static INLINE unsigned ++calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) ++{ ++ unsigned tile_h = NVC0_TILE_HEIGHT(tile_mode); ++ unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(tile_mode, 2); ++ unsigned tile_d = 1 << tile_d_shift; ++ ++ /* stride_2d == to next slice within this volume tile */ ++ /* stride_3d == size (in bytes) of a volume tile */ ++ unsigned stride_2d = tile_h * NVC0_TILE_PITCH(tile_mode); ++ unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch; ++ ++ return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d; ++} ++ ++static void ++nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) ++{ ++ struct nvc0_miptree *mt = nvc0_miptree(pt); ++ ++ nouveau_screen_bo_release(pscreen, mt->base.bo); ++ ++ FREE(mt); ++} ++ ++static boolean ++nvc0_miptree_get_handle(struct pipe_screen *pscreen, ++ struct pipe_resource *pt, ++ struct winsys_handle *whandle) ++{ ++ struct nvc0_miptree *mt = nvc0_miptree(pt); ++ unsigned stride; ++ ++ if (!mt || !mt->base.bo) ++ return FALSE; ++ ++ stride = util_format_get_stride(mt->base.base.format, ++ mt->base.base.width0); ++ ++ return nouveau_screen_bo_get_handle(pscreen, ++ mt->base.bo, ++ stride, ++ whandle); ++} ++ ++const struct u_resource_vtbl nvc0_miptree_vtbl = ++{ ++ nvc0_miptree_get_handle, /* get_handle */ ++ nvc0_miptree_destroy, /* resource_destroy */ ++ NULL, /* is_resource_referenced */ ++ nvc0_miptree_transfer_new, /* get_transfer */ ++ nvc0_miptree_transfer_del, /* transfer_destroy */ ++ nvc0_miptree_transfer_map, /* transfer_map */ ++ u_default_transfer_flush_region, /* transfer_flush_region */ ++ nvc0_miptree_transfer_unmap, /* transfer_unmap */ ++ u_default_transfer_inline_write /* transfer_inline_write */ ++}; ++ ++struct pipe_resource * ++nvc0_miptree_create(struct pipe_screen *pscreen, ++ const struct pipe_resource *templ) ++{ ++ struct nouveau_device *dev = nouveau_screen(pscreen)->device; ++ struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree); ++ struct pipe_resource *pt = &mt->base.base; ++ int ret; ++ unsigned w, h, d, l, alloc_size; ++ uint32_t tile_flags; ++ ++ if (!mt) ++ return NULL; ++ ++ mt->base.vtbl = &nvc0_miptree_vtbl; ++ *pt = *templ; ++ pipe_reference_init(&pt->reference, 1); ++ pt->screen = pscreen; ++ ++ mt->layout_3d = pt->target == PIPE_TEXTURE_3D; ++ ++ w = pt->width0; ++ h = pt->height0; ++ d = mt->layout_3d ? pt->depth0 : 1; ++ ++ switch (pt->format) { ++ case PIPE_FORMAT_Z16_UNORM: ++ tile_flags = 0x0700; /* COMPRESSED */ ++ tile_flags = 0x0200; /* NORMAL ? */ ++ tile_flags = 0x0100; /* NORMAL ? */ ++ break; ++ case PIPE_FORMAT_S8_USCALED_Z24_UNORM: ++ tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ ++ tile_flags = 0x4600; /* NORMAL */ ++ break; ++ case PIPE_FORMAT_Z24X8_UNORM: ++ case PIPE_FORMAT_Z24_UNORM_S8_USCALED: ++ tile_flags = 0x1100; /* NORMAL */ ++ if (w * h >= 128 * 128 && 0) ++ tile_flags = 0x1700; /* COMPRESSED, requires magic */ ++ break; ++ case PIPE_FORMAT_R32G32B32A32_FLOAT: ++ tile_flags = 0xf500; /* COMPRESSED */ ++ tile_flags = 0xf700; /* MSAA 2 */ ++ tile_flags = 0xf900; /* MSAA 4 */ ++ tile_flags = 0xfe00; /* NORMAL */ ++ break; ++ case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: ++ tile_flags = 0xce00; /* COMPRESSED */ ++ tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */ ++ tile_flags = 0xd000; /* MSAA 4, COMPRESSED */ ++ tile_flags = 0xc300; /* NORMAL */ ++ break; ++ case PIPE_FORMAT_R16G16B16A16_UNORM: ++ tile_flags = 0xe900; /* COMPRESSED */ ++ break; ++ default: ++ tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ ++ tile_flags = 0xfe00; /* NORMAL 32 BIT */ ++ if (w * h >= 128 * 128 && 0) ++ tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */ ++ break; ++ } ++ ++ /* For 3D textures, a mipmap is spanned by all the layers, for array ++ * textures and cube maps, each layer contains its own mipmaps. ++ */ ++ for (l = 0; l <= pt->last_level; ++l) { ++ struct nvc0_miptree_level *lvl = &mt->level[l]; ++ unsigned nbx = util_format_get_nblocksx(pt->format, w); ++ unsigned nby = util_format_get_nblocksy(pt->format, h); ++ unsigned blocksize = util_format_get_blocksize(pt->format); ++ ++ lvl->offset = mt->total_size; ++ lvl->tile_mode = get_tile_dims(nbx, nby, d); ++ lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode)); ++ ++ mt->total_size += lvl->pitch * ++ align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) * ++ align(d, NVC0_TILE_DEPTH(lvl->tile_mode)); ++ ++ w = u_minify(w, 1); ++ h = u_minify(h, 1); ++ d = u_minify(d, 1); ++ } ++ ++ if (pt->array_size > 1) { ++ mt->layer_stride = align(mt->total_size, ++ NVC0_TILE_SIZE(mt->level[0].tile_mode)); ++ mt->total_size = mt->layer_stride * pt->array_size; ++ } ++ ++ alloc_size = mt->total_size; ++ if (tile_flags == 0x1700) ++ alloc_size *= 3; /* HiZ, XXX: correct size */ ++ ++ ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size, ++ mt->level[0].tile_mode, tile_flags, ++ &mt->base.bo); ++ if (ret) { ++ FREE(mt); ++ return NULL; ++ } ++ mt->base.domain = NOUVEAU_BO_VRAM; ++ ++ return pt; ++} ++ ++struct pipe_resource * ++nvc0_miptree_from_handle(struct pipe_screen *pscreen, ++ const struct pipe_resource *templ, ++ struct winsys_handle *whandle) ++{ ++ struct nvc0_miptree *mt; ++ unsigned stride; ++ ++ /* only supports 2D, non-mipmapped textures for the moment */ ++ if ((templ->target != PIPE_TEXTURE_2D && ++ templ->target != PIPE_TEXTURE_RECT) || ++ templ->last_level != 0 || ++ templ->depth0 != 1 || ++ templ->array_size > 1) ++ return NULL; ++ ++ mt = CALLOC_STRUCT(nvc0_miptree); ++ if (!mt) ++ return NULL; ++ ++ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); ++ if (mt->base.bo == NULL) { ++ FREE(mt); ++ return NULL; ++ } ++ ++ mt->base.base = *templ; ++ mt->base.vtbl = &nvc0_miptree_vtbl; ++ pipe_reference_init(&mt->base.base.reference, 1); ++ mt->base.base.screen = pscreen; ++ mt->level[0].pitch = stride; ++ mt->level[0].offset = 0; ++ mt->level[0].tile_mode = mt->base.bo->tile_mode; ++ ++ /* no need to adjust bo reference count */ ++ return &mt->base.base; ++} ++ ++ ++/* Surface functions. ++ */ ++ ++struct pipe_surface * ++nvc0_miptree_surface_new(struct pipe_context *pipe, ++ struct pipe_resource *pt, ++ const struct pipe_surface *templ) ++{ ++ struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */ ++ struct nvc0_surface *ns; ++ struct pipe_surface *ps; ++ struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level]; ++ ++ ns = CALLOC_STRUCT(nvc0_surface); ++ if (!ns) ++ return NULL; ++ ps = &ns->base; ++ ++ pipe_reference_init(&ps->reference, 1); ++ pipe_resource_reference(&ps->texture, pt); ++ ps->context = pipe; ++ ps->format = pt->format; ++ ps->usage = templ->usage; ++ ps->u.tex.level = templ->u.tex.level; ++ ps->u.tex.first_layer = templ->u.tex.first_layer; ++ ps->u.tex.last_layer = templ->u.tex.last_layer; ++ ++ ns->width = u_minify(pt->width0, ps->u.tex.level); ++ ns->height = u_minify(pt->height0, ps->u.tex.level); ++ ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; ++ ns->offset = lvl->offset; ++ ++ /* comment says there are going to be removed, but they're used by the st */ ++ ps->width = ns->width; ++ ps->height = ns->height; ++ ++ if (mt->layout_3d) { ++ unsigned zslice = ps->u.tex.first_layer; ++ ++ /* TODO: re-layout the texture to use only depth 1 tiles in this case: */ ++ if (ns->depth > 1 && (zslice & (NVC0_TILE_DEPTH(lvl->tile_mode) - 1))) ++ NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n", ++ zslice, ps->u.tex.last_layer); ++ ++ ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, ++ util_format_get_nblocksy(pt->format, ++ ns->height)); ++ } else { ++ ns->offset += mt->layer_stride * ps->u.tex.first_layer; ++ } ++ ++ return ps; ++} ++ ++void ++nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) ++{ ++ struct nvc0_surface *s = nvc0_surface(ps); ++ ++ pipe_resource_reference(&ps->texture, NULL); ++ ++ FREE(s); ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c +new file mode 100644 +index 0000000..0629dad +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_mm.c +@@ -0,0 +1,274 @@ ++ ++#include "util/u_inlines.h" ++#include "util/u_memory.h" ++#include "util/u_double_list.h" ++ ++#include "nvc0_screen.h" ++ ++#define MM_MIN_ORDER 7 ++#define MM_MAX_ORDER 20 ++ ++#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1) ++ ++#define MM_MIN_SIZE (1 << MM_MIN_ORDER) ++#define MM_MAX_SIZE (1 << MM_MAX_ORDER) ++ ++struct mm_bucket { ++ struct list_head free; ++ struct list_head used; ++ struct list_head full; ++ int num_free; ++}; ++ ++struct nvc0_mman { ++ struct nouveau_device *dev; ++ struct mm_bucket bucket[MM_NUM_BUCKETS]; ++ uint32_t storage_type; ++ uint32_t domain; ++ uint64_t allocated; ++}; ++ ++struct mm_slab { ++ struct list_head head; ++ struct nouveau_bo *bo; ++ struct nvc0_mman *cache; ++ int order; ++ int count; ++ int free; ++ uint32_t bits[0]; ++}; ++ ++static int ++mm_slab_alloc(struct mm_slab *slab) ++{ ++ int i, n, b; ++ ++ if (slab->free == 0) ++ return -1; ++ ++ for (i = 0; i < (slab->count + 31) / 32; ++i) { ++ b = ffs(slab->bits[i]) - 1; ++ if (b >= 0) { ++ n = i * 32 + b; ++ assert(n < slab->count); ++ slab->free--; ++ slab->bits[i] &= ~(1 << b); ++ return n; ++ } ++ } ++ return -1; ++} ++ ++static INLINE void ++mm_slab_free(struct mm_slab *slab, int i) ++{ ++ assert(i < slab->count); ++ slab->bits[i / 32] |= 1 << (i % 32); ++ slab->free++; ++ assert(slab->free <= slab->count); ++} ++ ++static INLINE int ++mm_get_order(uint32_t size) ++{ ++ int s = __builtin_clz(size) ^ 31; ++ ++ if (size > (1 << s)) ++ s += 1; ++ return s; ++} ++ ++static struct mm_bucket * ++mm_bucket_by_order(struct nvc0_mman *cache, int order) ++{ ++ if (order > MM_MAX_ORDER) ++ return NULL; ++ return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER]; ++} ++ ++static struct mm_bucket * ++mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) ++{ ++ return mm_bucket_by_order(cache, mm_get_order(size)); ++} ++ ++/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */ ++static INLINE uint32_t ++mm_default_slab_size(unsigned chunk_order) ++{ ++ assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); ++ ++ static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = ++ { ++ 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 ++ }; ++ ++ return 1 << slab_order[chunk_order - MM_MIN_ORDER]; ++} ++ ++static int ++mm_slab_new(struct nvc0_mman *cache, int chunk_order) ++{ ++ struct mm_slab *slab; ++ int words, ret; ++ const uint32_t size = mm_default_slab_size(chunk_order); ++ ++ words = ((size >> chunk_order) + 31) / 32; ++ assert(words); ++ ++ slab = MALLOC(sizeof(struct mm_slab) + words * 4); ++ if (!slab) ++ return PIPE_ERROR_OUT_OF_MEMORY; ++ ++ memset(&slab->bits[0], ~0, words * 4); ++ ++ slab->bo = NULL; ++ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, ++ 0, cache->storage_type, &slab->bo); ++ if (ret) { ++ FREE(slab); ++ return PIPE_ERROR_OUT_OF_MEMORY; ++ } ++ ++ LIST_INITHEAD(&slab->head); ++ ++ slab->cache = cache; ++ slab->order = chunk_order; ++ slab->count = slab->free = size >> chunk_order; ++ ++ LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free); ++ ++ cache->allocated += size; ++ ++ debug_printf("MM: new slab, total memory = %lu KiB\n", ++ cache->allocated / 1024); ++ ++ return PIPE_OK; ++} ++ ++/* @return token to identify slab or NULL if we just allocated a new bo */ ++struct nvc0_mm_allocation * ++nvc0_mm_allocate(struct nvc0_mman *cache, ++ uint32_t size, struct nouveau_bo **bo, uint32_t *offset) ++{ ++ struct mm_bucket *bucket; ++ struct mm_slab *slab; ++ struct nvc0_mm_allocation *alloc; ++ int ret; ++ ++ bucket = mm_bucket_by_size(cache, size); ++ if (!bucket) { ++ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, ++ 0, cache->storage_type, bo); ++ if (ret) ++ debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret); ++ ++ *offset = 0; ++ return NULL; ++ } ++ ++ if (!LIST_IS_EMPTY(&bucket->used)) { ++ slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head); ++ } else { ++ if (LIST_IS_EMPTY(&bucket->free)) { ++ mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER)); ++ } ++ slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head); ++ ++ LIST_DEL(&slab->head); ++ LIST_ADD(&slab->head, &bucket->used); ++ } ++ ++ *offset = mm_slab_alloc(slab) << slab->order; ++ ++ alloc = MALLOC_STRUCT(nvc0_mm_allocation); ++ if (!alloc) ++ return NULL; ++ ++ nouveau_bo_ref(slab->bo, bo); ++ ++ if (slab->free == 0) { ++ LIST_DEL(&slab->head); ++ LIST_ADD(&slab->head, &bucket->full); ++ } ++ ++ alloc->next = NULL; ++ alloc->offset = *offset; ++ alloc->priv = (void *)slab; ++ ++ return alloc; ++} ++ ++void ++nvc0_mm_free(struct nvc0_mm_allocation *alloc) ++{ ++ struct mm_slab *slab = (struct mm_slab *)alloc->priv; ++ struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order); ++ ++ mm_slab_free(slab, alloc->offset >> slab->order); ++ ++ if (slab->free == 1) { ++ LIST_DEL(&slab->head); ++ ++ if (slab->count > 1) ++ LIST_ADDTAIL(&slab->head, &bucket->used); ++ else ++ LIST_ADDTAIL(&slab->head, &bucket->free); ++ } ++ ++ FREE(alloc); ++} ++ ++struct nvc0_mman * ++nvc0_mm_create(struct nouveau_device *dev, uint32_t domain, ++ uint32_t storage_type) ++{ ++ struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman); ++ int i; ++ ++ if (!cache) ++ return NULL; ++ ++ cache->dev = dev; ++ cache->domain = domain; ++ cache->storage_type = storage_type; ++ cache->allocated = 0; ++ ++ for (i = 0; i < MM_NUM_BUCKETS; ++i) { ++ LIST_INITHEAD(&cache->bucket[i].free); ++ LIST_INITHEAD(&cache->bucket[i].used); ++ LIST_INITHEAD(&cache->bucket[i].full); ++ } ++ ++ return cache; ++} ++ ++static INLINE void ++nvc0_mm_free_slabs(struct list_head *head) ++{ ++ struct mm_slab *slab, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) { ++ LIST_DEL(&slab->head); ++ nouveau_bo_ref(NULL, &slab->bo); ++ FREE(slab); ++ } ++} ++ ++void ++nvc0_mm_destroy(struct nvc0_mman *cache) ++{ ++ int i; ++ ++ for (i = 0; i < MM_NUM_BUCKETS; ++i) { ++ if (!LIST_IS_EMPTY(&cache->bucket[i].used) || ++ !LIST_IS_EMPTY(&cache->bucket[i].full)) ++ debug_printf("WARNING: destroying GPU memory cache " ++ "with some buffers still in use\n"); ++ ++ nvc0_mm_free_slabs(&cache->bucket[i].free); ++ nvc0_mm_free_slabs(&cache->bucket[i].used); ++ nvc0_mm_free_slabs(&cache->bucket[i].full); ++ } ++} ++ +diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c +new file mode 100644 +index 0000000..304a191 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_pc.c +@@ -0,0 +1,693 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#define NOUVEAU_DEBUG 1 ++ ++#include "nvc0_pc.h" ++#include "nvc0_program.h" ++ ++boolean ++nvc0_insn_can_load(struct nv_instruction *nvi, int s, ++ struct nv_instruction *ld) ++{ ++ int i; ++ ++ if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) { ++ if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s))) ++ return FALSE; ++ if (!(nvc0_op_info_table[nvi->opcode].immediate & 4)) ++ if (ld->src[0]->value->reg.imm.u32 & 0xfff) ++ return FALSE; ++ } else ++ if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s))) ++ return FALSE; ++ ++ if (ld->indirect >= 0) ++ return FALSE; ++ ++ for (i = 0; i < 3 && nvi->src[i]; ++i) ++ if (nvi->src[i]->value->reg.file == NV_FILE_IMM) ++ return FALSE; ++ ++ return TRUE; ++} ++ ++/* Return whether this instruction can be executed conditionally. */ ++boolean ++nvc0_insn_is_predicateable(struct nv_instruction *nvi) ++{ ++ int s; ++ ++ if (!nv_op_predicateable(nvi->opcode)) ++ return FALSE; ++ if (nvi->predicate >= 0) ++ return FALSE; ++ for (s = 0; s < 4 && nvi->src[s]; ++s) ++ if (nvi->src[s]->value->reg.file == NV_FILE_IMM) ++ return FALSE; ++ return TRUE; ++} ++ ++int ++nvc0_insn_refcount(struct nv_instruction *nvi) ++{ ++ int rc = 0; ++ int i; ++ for (i = 0; i < 5 && nvi->def[i]; ++i) { ++ if (!nvi->def[i]) ++ return rc; ++ rc += nvi->def[i]->refc; ++ } ++ return rc; ++} ++ ++int ++nvc0_pc_replace_value(struct nv_pc *pc, ++ struct nv_value *old_val, ++ struct nv_value *new_val) ++{ ++ int i, n, s; ++ ++ if (old_val == new_val) ++ return old_val->refc; ++ ++ for (i = 0, n = 0; i < pc->num_refs; ++i) { ++ if (pc->refs[i]->value == old_val) { ++ ++n; ++ for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s) ++ if (pc->refs[i]->insn->src[s] == pc->refs[i]) ++ break; ++ assert(s < 6); ++ nv_reference(pc, pc->refs[i]->insn, s, new_val); ++ } ++ } ++ return n; ++} ++ ++struct nv_value * ++nvc0_pc_find_constant(struct nv_ref *ref) ++{ ++ struct nv_value *src; ++ ++ if (!ref) ++ return NULL; ++ ++ src = ref->value; ++ while (src->insn && src->insn->opcode == NV_OP_MOV) { ++ assert(!src->insn->src[0]->mod); ++ src = src->insn->src[0]->value; ++ } ++ if ((src->reg.file == NV_FILE_IMM) || ++ (src->insn && ++ src->insn->opcode == NV_OP_LD && ++ src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && ++ src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15))) ++ return src; ++ return NULL; ++} ++ ++struct nv_value * ++nvc0_pc_find_immediate(struct nv_ref *ref) ++{ ++ struct nv_value *src = nvc0_pc_find_constant(ref); ++ ++ return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; ++} ++ ++static void ++nv_pc_free_refs(struct nv_pc *pc) ++{ ++ int i; ++ for (i = 0; i < pc->num_refs; i += 64) ++ FREE(pc->refs[i]); ++ FREE(pc->refs); ++} ++ ++static const char * ++edge_name(ubyte type) ++{ ++ switch (type) { ++ case CFG_EDGE_FORWARD: return "forward"; ++ case CFG_EDGE_BACK: return "back"; ++ case CFG_EDGE_LOOP_ENTER: return "loop"; ++ case CFG_EDGE_LOOP_LEAVE: return "break"; ++ case CFG_EDGE_FAKE: return "fake"; ++ default: ++ return "?"; ++ } ++} ++ ++void ++nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, ++ void *priv) ++{ ++ struct nv_basic_block *bb[64], *bbb[16], *b; ++ int j, p, pp; ++ ++ bb[0] = root; ++ p = 1; ++ pp = 0; ++ ++ while (p > 0) { ++ b = bb[--p]; ++ b->priv = 0; ++ ++ for (j = 1; j >= 0; --j) { ++ if (!b->out[j]) ++ continue; ++ ++ switch (b->out_kind[j]) { ++ case CFG_EDGE_BACK: ++ continue; ++ case CFG_EDGE_FORWARD: ++ case CFG_EDGE_FAKE: ++ if (++b->out[j]->priv == b->out[j]->num_in) ++ bb[p++] = b->out[j]; ++ break; ++ case CFG_EDGE_LOOP_ENTER: ++ bb[p++] = b->out[j]; ++ break; ++ case CFG_EDGE_LOOP_LEAVE: ++ bbb[pp++] = b->out[j]; ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ } ++ ++ f(priv, b); ++ ++ if (!p) { ++ p = pp; ++ for (; pp > 0; --pp) ++ bb[pp - 1] = bbb[pp - 1]; ++ } ++ } ++} ++ ++static void ++nv_do_print_function(void *priv, struct nv_basic_block *b) ++{ ++ struct nv_instruction *i; ++ ++ debug_printf("=== BB %i ", b->id); ++ if (b->out[0]) ++ debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); ++ if (b->out[1]) ++ debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); ++ debug_printf("===\n"); ++ ++ i = b->phi; ++ if (!i) ++ i = b->entry; ++ for (; i; i = i->next) ++ nvc0_print_instruction(i); ++} ++ ++void ++nvc0_print_function(struct nv_basic_block *root) ++{ ++ if (root->subroutine) ++ debug_printf("SUBROUTINE %i\n", root->subroutine); ++ else ++ debug_printf("MAIN\n"); ++ ++ nvc0_pc_pass_in_order(root, nv_do_print_function, root); ++} ++ ++void ++nvc0_print_program(struct nv_pc *pc) ++{ ++ int i; ++ for (i = 0; i < pc->num_subroutines + 1; ++i) ++ if (pc->root[i]) ++ nvc0_print_function(pc->root[i]); ++} ++ ++#if NOUVEAU_DEBUG > 1 ++static void ++nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) ++{ ++ int i; ++ ++ b->pass_seq = pc->pass_seq; ++ ++ fprintf(f, "\t%i [shape=box]\n", b->id); ++ ++ for (i = 0; i < 2; ++i) { ++ if (!b->out[i]) ++ continue; ++ switch (b->out_kind[i]) { ++ case CFG_EDGE_FORWARD: ++ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); ++ break; ++ case CFG_EDGE_LOOP_ENTER: ++ fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); ++ break; ++ case CFG_EDGE_LOOP_LEAVE: ++ fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); ++ break; ++ case CFG_EDGE_BACK: ++ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); ++ continue; ++ case CFG_EDGE_FAKE: ++ fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ if (b->out[i]->pass_seq < pc->pass_seq) ++ nv_do_print_cfgraph(pc, f, b->out[i]); ++ } ++} ++ ++/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ ++static void ++nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) ++{ ++ FILE *f; ++ ++ f = fopen(filepath, "a"); ++ if (!f) ++ return; ++ ++ fprintf(f, "digraph G {\n"); ++ ++ ++pc->pass_seq; ++ ++ nv_do_print_cfgraph(pc, f, pc->root[subr]); ++ ++ fprintf(f, "}\n"); ++ ++ fclose(f); ++} ++#endif ++ ++static INLINE void ++nvc0_pc_print_binary(struct nv_pc *pc) ++{ ++ unsigned i; ++ ++ NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8); ++ ++ for (i = 0; i < pc->emit_size / 4; i += 2) { ++ debug_printf("0x%08x ", pc->emit[i + 0]); ++ debug_printf("0x%08x ", pc->emit[i + 1]); ++ if ((i % 16) == 15) ++ debug_printf("\n"); ++ } ++ debug_printf("\n"); ++} ++ ++static int ++nvc0_emit_program(struct nv_pc *pc) ++{ ++ uint32_t *code = pc->emit; ++ int n; ++ ++ NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size); ++ ++ pc->emit_pos = 0; ++ for (n = 0; n < pc->num_blocks; ++n) { ++ struct nv_instruction *i; ++ struct nv_basic_block *b = pc->bb_list[n]; ++ ++ for (i = b->entry; i; i = i->next) { ++ nvc0_emit_instruction(pc, i); ++ pc->emit += 2; ++ pc->emit_pos += 8; ++ } ++ } ++ assert(pc->emit == &code[pc->emit_size / 4]); ++ ++ pc->emit[0] = 0x00001de7; ++ pc->emit[1] = 0x80000000; ++ pc->emit_size += 8; ++ ++ pc->emit = code; ++ ++#ifdef NOUVEAU_DEBUG ++ nvc0_pc_print_binary(pc); ++#else ++ debug_printf("not printing binary\n"); ++#endif ++ return 0; ++} ++ ++int ++nvc0_generate_code(struct nvc0_translation_info *ti) ++{ ++ struct nv_pc *pc; ++ int ret; ++ int i; ++ ++ pc = CALLOC_STRUCT(nv_pc); ++ if (!pc) ++ return 1; ++ ++ pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT; ++ ++ pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0])); ++ if (!pc->root) { ++ FREE(pc); ++ return 1; ++ } ++ pc->num_subroutines = ti->num_subrs; ++ ++ ret = nvc0_tgsi_to_nc(pc, ti); ++ if (ret) ++ goto out; ++#if NOUVEAU_DEBUG > 1 ++ nvc0_print_program(pc); ++#endif ++ ++ pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE; ++ ++ /* optimization */ ++ ret = nvc0_pc_exec_pass0(pc); ++ if (ret) ++ goto out; ++#ifdef NOUVEAU_DEBUG ++ nvc0_print_program(pc); ++#endif ++ ++ /* register allocation */ ++ ret = nvc0_pc_exec_pass1(pc); ++ if (ret) ++ goto out; ++#if NOUVEAU_DEBUG > 1 ++ nvc0_print_program(pc); ++ nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); ++#endif ++ ++ /* prepare for emission */ ++ ret = nvc0_pc_exec_pass2(pc); ++ if (ret) ++ goto out; ++ assert(!(pc->emit_size % 8)); ++ ++ pc->emit = CALLOC(pc->emit_size / 4 + 2, 4); ++ if (!pc->emit) { ++ ret = 3; ++ goto out; ++ } ++ ret = nvc0_emit_program(pc); ++ if (ret) ++ goto out; ++ ++ ti->prog->code = pc->emit; ++ ti->prog->code_base = 0; ++ ti->prog->code_size = pc->emit_size; ++ ti->prog->parm_size = 0; ++ ++ ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1); ++ ++ ti->prog->relocs = pc->reloc_entries; ++ ti->prog->num_relocs = pc->num_relocs; ++ ++ NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); ++ ++out: ++ nv_pc_free_refs(pc); ++ ++ for (i = 0; i < pc->num_blocks; ++i) ++ FREE(pc->bb_list[i]); ++ if (pc->root) ++ FREE(pc->root); ++ if (ret) { ++ /* on success, these will be referenced by struct nvc0_program */ ++ if (pc->emit) ++ FREE(pc->emit); ++ if (pc->immd_buf) ++ FREE(pc->immd_buf); ++ if (pc->reloc_entries) ++ FREE(pc->reloc_entries); ++ } ++ FREE(pc); ++ return ret; ++} ++ ++static void ++nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i) ++{ ++ if (!b->phi) { ++ i->prev = NULL; ++ b->phi = i; ++ i->next = b->entry; ++ if (b->entry) { ++ assert(!b->entry->prev && b->exit); ++ b->entry->prev = i; ++ } else { ++ b->entry = i; ++ b->exit = i; ++ } ++ } else { ++ assert(b->entry); ++ if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */ ++ assert(b->entry == b->exit); ++ b->entry->next = i; ++ i->prev = b->entry; ++ b->entry = i; ++ b->exit = i; ++ } else { /* insert before entry */ ++ assert(b->entry->prev && b->exit); ++ i->next = b->entry; ++ i->prev = b->entry->prev; ++ b->entry->prev = i; ++ i->prev->next = i; ++ } ++ } ++} ++ ++void ++nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) ++{ ++ if (i->opcode == NV_OP_PHI) { ++ nvbb_insert_phi(b, i); ++ } else { ++ i->prev = b->exit; ++ if (b->exit) ++ b->exit->next = i; ++ b->exit = i; ++ if (!b->entry) ++ b->entry = i; ++ else ++ if (i->prev && i->prev->opcode == NV_OP_PHI) ++ b->entry = i; ++ } ++ ++ i->bb = b; ++ b->num_instructions++; ++} ++ ++void ++nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) ++{ ++ if (!at->next) { ++ nvc0_insn_append(at->bb, ni); ++ return; ++ } ++ ni->next = at->next; ++ ni->prev = at; ++ ni->next->prev = ni; ++ ni->prev->next = ni; ++} ++ ++void ++nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni) ++{ ++ nvc0_insn_insert_after(at, ni); ++ nvc0_insns_permute(at, ni); ++} ++ ++void ++nvc0_insn_delete(struct nv_instruction *nvi) ++{ ++ struct nv_basic_block *b = nvi->bb; ++ int s; ++ ++ /* debug_printf("REM: "); nv_print_instruction(nvi); */ ++ ++ for (s = 0; s < 6 && nvi->src[s]; ++s) ++ nv_reference(NULL, nvi, s, NULL); ++ ++ if (nvi->next) ++ nvi->next->prev = nvi->prev; ++ else { ++ assert(nvi == b->exit); ++ b->exit = nvi->prev; ++ } ++ ++ if (nvi->prev) ++ nvi->prev->next = nvi->next; ++ ++ if (nvi == b->entry) { ++ /* PHIs don't get hooked to b->entry */ ++ b->entry = nvi->next; ++ assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI); ++ } ++ ++ if (nvi == b->phi) { ++ if (nvi->opcode != NV_OP_PHI) ++ NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n"); ++ ++ assert(!nvi->prev); ++ if (!nvi->next || nvi->next->opcode != NV_OP_PHI) ++ b->phi = NULL; ++ else ++ b->phi = nvi->next; ++ } ++} ++ ++void ++nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2) ++{ ++ struct nv_basic_block *b = i1->bb; ++ ++ assert(i1->opcode != NV_OP_PHI && ++ i2->opcode != NV_OP_PHI); ++ assert(i1->next == i2); ++ ++ if (b->exit == i2) ++ b->exit = i1; ++ ++ if (b->entry == i1) ++ b->entry = i2; ++ ++ i2->prev = i1->prev; ++ i1->next = i2->next; ++ i2->next = i1; ++ i1->prev = i2; ++ ++ if (i2->prev) ++ i2->prev->next = i2; ++ if (i1->next) ++ i1->next->prev = i1; ++} ++ ++void ++nvc0_bblock_attach(struct nv_basic_block *parent, ++ struct nv_basic_block *b, ubyte edge_kind) ++{ ++ assert(b->num_in < 8); ++ ++ if (parent->out[0]) { ++ assert(!parent->out[1]); ++ parent->out[1] = b; ++ parent->out_kind[1] = edge_kind; ++ } else { ++ parent->out[0] = b; ++ parent->out_kind[0] = edge_kind; ++ } ++ ++ b->in[b->num_in] = parent; ++ b->in_kind[b->num_in++] = edge_kind; ++} ++ ++/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ ++ ++boolean ++nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) ++{ ++ int j; ++ ++ if (b == d) ++ return TRUE; ++ ++ for (j = 0; j < b->num_in; ++j) ++ if ((b->in_kind[j] != CFG_EDGE_BACK) && ++ !nvc0_bblock_dominated_by(b->in[j], d)) ++ return FALSE; ++ ++ return j ? TRUE : FALSE; ++} ++ ++/* check if @bf (future) can be reached from @bp (past), stop at @bt */ ++boolean ++nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, ++ struct nv_basic_block *bt) ++{ ++ struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; ++ int i, p, n; ++ ++ p = 0; ++ n = 1; ++ q[0] = bp; ++ ++ while (p < n) { ++ b = q[p++]; ++ ++ if (b == bf) ++ break; ++ if (b == bt) ++ continue; ++ assert(n <= (1024 - 2)); ++ ++ for (i = 0; i < 2; ++i) { ++ if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { ++ q[n] = b->out[i]; ++ q[n++]->priv = 1; ++ } ++ } ++ } ++ for (--n; n >= 0; --n) ++ q[n]->priv = 0; ++ ++ return (b == bf); ++} ++ ++static struct nv_basic_block * ++nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) ++{ ++ struct nv_basic_block *out; ++ int i; ++ ++ if (!nvc0_bblock_dominated_by(df, b)) { ++ for (i = 0; i < df->num_in; ++i) { ++ if (df->in_kind[i] == CFG_EDGE_BACK) ++ continue; ++ if (nvc0_bblock_dominated_by(df->in[i], b)) ++ return df; ++ } ++ } ++ for (i = 0; i < 2 && df->out[i]; ++i) { ++ if (df->out_kind[i] == CFG_EDGE_BACK) ++ continue; ++ if ((out = nvbb_find_dom_frontier(b, df->out[i]))) ++ return out; ++ } ++ return NULL; ++} ++ ++struct nv_basic_block * ++nvc0_bblock_dom_frontier(struct nv_basic_block *b) ++{ ++ struct nv_basic_block *df; ++ int i; ++ ++ for (i = 0; i < 2 && b->out[i]; ++i) ++ if ((df = nvbb_find_dom_frontier(b, b->out[i]))) ++ return df; ++ return NULL; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h +new file mode 100644 +index 0000000..969cc68 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_pc.h +@@ -0,0 +1,653 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#ifndef __NVC0_COMPILER_H__ ++#define __NVC0_COMPILER_H__ ++ ++#include ++ ++#ifndef NOUVEAU_DBG ++#ifdef NOUVEAU_DEBUG ++# define NOUVEAU_DBG(args...) debug_printf(args); ++#else ++# define NOUVEAU_DBG(args...) ++#endif ++#endif ++ ++#ifndef NOUVEAU_ERR ++#define NOUVEAU_ERR(fmt, args...) \ ++ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); ++#endif ++ ++#include "pipe/p_defines.h" ++#include "util/u_inlines.h" ++#include "util/u_memory.h" ++#include "util/u_double_list.h" ++ ++/* pseudo opcodes */ ++#define NV_OP_UNDEF 0 ++#define NV_OP_BIND 1 ++#define NV_OP_MERGE 2 ++#define NV_OP_PHI 3 ++#define NV_OP_SELECT 4 ++#define NV_OP_NOP 5 ++ ++/** ++ * BIND forces source operand i into the same register as destination operand i, ++ * and the operands will be assigned consecutive registers (needed for TEX) ++ * SELECT forces its multiple source operands and its destination operand into ++ * one and the same register. ++ */ ++ ++/* base opcodes */ ++#define NV_OP_LD 6 ++#define NV_OP_ST 7 ++#define NV_OP_MOV 8 ++#define NV_OP_AND 9 ++#define NV_OP_OR 10 ++#define NV_OP_XOR 11 ++#define NV_OP_SHL 12 ++#define NV_OP_SHR 13 ++#define NV_OP_NOT 14 ++#define NV_OP_SET 15 ++#define NV_OP_ADD 16 ++#define NV_OP_SUB 17 ++#define NV_OP_MUL 18 ++#define NV_OP_MAD 19 ++#define NV_OP_ABS 20 ++#define NV_OP_NEG 21 ++#define NV_OP_MAX 22 ++#define NV_OP_MIN 23 ++#define NV_OP_CVT 24 ++#define NV_OP_CEIL 25 ++#define NV_OP_FLOOR 26 ++#define NV_OP_TRUNC 27 ++#define NV_OP_SAD 28 ++ ++/* shader opcodes */ ++#define NV_OP_VFETCH 29 ++#define NV_OP_PFETCH 30 ++#define NV_OP_EXPORT 31 ++#define NV_OP_LINTERP 32 ++#define NV_OP_PINTERP 33 ++#define NV_OP_EMIT 34 ++#define NV_OP_RESTART 35 ++#define NV_OP_TEX 36 ++#define NV_OP_TXB 37 ++#define NV_OP_TXL 38 ++#define NV_OP_TXF 39 ++#define NV_OP_TXQ 40 ++#define NV_OP_QUADOP 41 ++#define NV_OP_DFDX 42 ++#define NV_OP_DFDY 43 ++#define NV_OP_KIL 44 ++ ++/* control flow opcodes */ ++#define NV_OP_BRA 45 ++#define NV_OP_CALL 46 ++#define NV_OP_RET 47 ++#define NV_OP_EXIT 48 ++#define NV_OP_BREAK 49 ++#define NV_OP_BREAKADDR 50 ++#define NV_OP_JOINAT 51 ++#define NV_OP_JOIN 52 ++ ++/* typed opcodes */ ++#define NV_OP_ADD_F32 NV_OP_ADD ++#define NV_OP_ADD_B32 53 ++#define NV_OP_MUL_F32 NV_OP_MUL ++#define NV_OP_MUL_B32 54 ++#define NV_OP_ABS_F32 NV_OP_ABS ++#define NV_OP_ABS_S32 55 ++#define NV_OP_NEG_F32 NV_OP_NEG ++#define NV_OP_NEG_S32 56 ++#define NV_OP_MAX_F32 NV_OP_MAX ++#define NV_OP_MAX_S32 57 ++#define NV_OP_MAX_U32 58 ++#define NV_OP_MIN_F32 NV_OP_MIN ++#define NV_OP_MIN_S32 59 ++#define NV_OP_MIN_U32 60 ++#define NV_OP_SET_F32 61 ++#define NV_OP_SET_S32 62 ++#define NV_OP_SET_U32 63 ++#define NV_OP_SAR 64 ++#define NV_OP_RCP 65 ++#define NV_OP_RSQ 66 ++#define NV_OP_LG2 67 ++#define NV_OP_SIN 68 ++#define NV_OP_COS 69 ++#define NV_OP_EX2 70 ++#define NV_OP_PRESIN 71 ++#define NV_OP_PREEX2 72 ++#define NV_OP_SAT 73 ++ ++/* newly added opcodes */ ++#define NV_OP_SET_F32_AND 74 ++#define NV_OP_SET_F32_OR 75 ++#define NV_OP_SET_F32_XOR 76 ++#define NV_OP_SELP 77 ++#define NV_OP_SLCT 78 ++#define NV_OP_SLCT_F32 NV_OP_SLCT ++#define NV_OP_SLCT_S32 79 ++#define NV_OP_SLCT_U32 80 ++#define NV_OP_SUB_F32 NV_OP_SUB ++#define NV_OP_SUB_S32 81 ++#define NV_OP_MAD_F32 NV_OP_MAD ++#define NV_OP_FSET_F32 82 ++#define NV_OP_TXG 83 ++ ++#define NV_OP_COUNT 84 ++ ++/* nv50 files omitted */ ++#define NV_FILE_GPR 0 ++#define NV_FILE_COND 1 ++#define NV_FILE_PRED 2 ++#define NV_FILE_IMM 16 ++#define NV_FILE_MEM_S 32 ++#define NV_FILE_MEM_V 34 ++#define NV_FILE_MEM_A 35 ++#define NV_FILE_MEM_L 48 ++#define NV_FILE_MEM_G 64 ++#define NV_FILE_MEM_C(i) (80 + i) ++ ++#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S) ++ ++#define NV_MOD_NEG 1 ++#define NV_MOD_ABS 2 ++#define NV_MOD_NOT 4 ++#define NV_MOD_SAT 8 ++ ++#define NV_TYPE_U8 0x00 ++#define NV_TYPE_S8 0x01 ++#define NV_TYPE_U16 0x02 ++#define NV_TYPE_S16 0x03 ++#define NV_TYPE_U32 0x04 ++#define NV_TYPE_S32 0x05 ++#define NV_TYPE_P32 0x07 ++#define NV_TYPE_F32 0x09 ++#define NV_TYPE_F64 0x0b ++#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4)) ++#define NV_TYPE_ANY 0xff ++ ++#define NV_TYPE_ISINT(t) ((t) < 7) ++#define NV_TYPE_ISSGD(t) ((t) & 1) ++ ++#define NV_CC_FL 0x0 ++#define NV_CC_LT 0x1 ++#define NV_CC_EQ 0x2 ++#define NV_CC_LE 0x3 ++#define NV_CC_GT 0x4 ++#define NV_CC_NE 0x5 ++#define NV_CC_GE 0x6 ++#define NV_CC_U 0x8 ++#define NV_CC_TR 0xf ++#define NV_CC_O 0x10 ++#define NV_CC_C 0x11 ++#define NV_CC_A 0x12 ++#define NV_CC_S 0x13 ++ ++#define NV_PC_MAX_INSTRUCTIONS 2048 ++#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) ++ ++#define NV_PC_MAX_BASIC_BLOCKS 1024 ++ ++struct nv_op_info { ++ uint base; /* e.g. ADD_S32 -> ADD */ ++ char name[12]; ++ uint8_t type; ++ uint8_t mods; ++ unsigned flow : 1; ++ unsigned commutative : 1; ++ unsigned vector : 1; ++ unsigned predicate : 1; ++ unsigned pseudo : 1; ++ unsigned immediate : 3; ++ unsigned memory : 3; ++}; ++ ++extern struct nv_op_info nvc0_op_info_table[]; ++ ++#define NV_BASEOP(op) (nvc0_op_info_table[op].base) ++#define NV_OPTYPE(op) (nvc0_op_info_table[op].type) ++ ++static INLINE uint ++nv_op_base(uint opcode) ++{ ++ return nvc0_op_info_table[opcode].base; ++} ++ ++static INLINE boolean ++nv_is_texture_op(uint opcode) ++{ ++ return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ); ++} ++ ++static INLINE boolean ++nv_is_vector_op(uint opcode) ++{ ++ return nvc0_op_info_table[opcode].vector ? TRUE : FALSE; ++} ++ ++static INLINE boolean ++nv_op_commutative(uint opcode) ++{ ++ return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE; ++} ++ ++static INLINE uint8_t ++nv_op_supported_src_mods(uint opcode) ++{ ++ return nvc0_op_info_table[opcode].mods; ++} ++ ++static INLINE boolean ++nv_op_predicateable(uint opcode) ++{ ++ return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; ++} ++ ++static INLINE uint ++nv_type_order(ubyte type) ++{ ++ switch (type & 0xf) { ++ case NV_TYPE_U8: ++ case NV_TYPE_S8: ++ return 0; ++ case NV_TYPE_U16: ++ case NV_TYPE_S16: ++ return 1; ++ case NV_TYPE_U32: ++ case NV_TYPE_F32: ++ case NV_TYPE_S32: ++ case NV_TYPE_P32: ++ return 2; ++ case NV_TYPE_F64: ++ return 3; ++ } ++ assert(0); ++ return 0; ++} ++ ++static INLINE uint ++nv_type_sizeof(ubyte type) ++{ ++ if (type & 0xf0) ++ return (1 << nv_type_order(type)) * (type >> 4); ++ return 1 << nv_type_order(type); ++} ++ ++static INLINE uint ++nv_type_sizeof_base(ubyte type) ++{ ++ return 1 << nv_type_order(type); ++} ++ ++struct nv_reg { ++ uint32_t address; /* for memory locations */ ++ int id; /* for registers */ ++ ubyte file; ++ ubyte size; ++ union { ++ int32_t s32; ++ int64_t s64; ++ uint64_t u64; ++ uint32_t u32; ++ float f32; ++ double f64; ++ } imm; ++}; ++ ++struct nv_range { ++ struct nv_range *next; ++ int bgn; ++ int end; ++}; ++ ++struct nv_ref; ++ ++struct nv_value { ++ struct nv_reg reg; ++ struct nv_instruction *insn; ++ struct nv_value *join; ++ struct nv_ref *last_use; ++ int n; ++ struct nv_range *livei; ++ int refc; ++ struct nv_value *next; ++ struct nv_value *prev; ++}; ++ ++struct nv_ref { ++ struct nv_value *value; ++ struct nv_instruction *insn; ++ struct list_head list; /* connects uses of the same value */ ++ uint8_t mod; ++ uint8_t flags; ++}; ++ ++struct nv_basic_block; ++ ++struct nv_instruction { ++ struct nv_instruction *next; ++ struct nv_instruction *prev; ++ uint opcode; ++ uint serial; ++ ++ struct nv_value *def[5]; ++ struct nv_ref *src[6]; ++ ++ int8_t predicate; /* index of predicate src */ ++ int8_t indirect; /* index of pointer src */ ++ ++ union { ++ struct { ++ uint8_t t; /* TIC binding */ ++ uint8_t s; /* TSC binding */ ++ } tex; ++ struct { ++ uint8_t d; /* output type */ ++ uint8_t s; /* input type */ ++ } cvt; ++ } ext; ++ ++ struct nv_basic_block *bb; ++ struct nv_basic_block *target; /* target block of control flow insn */ ++ ++ unsigned cc : 5; /* condition code */ ++ unsigned fixed : 1; /* don't optimize away (prematurely) */ ++ unsigned terminator : 1; ++ unsigned join : 1; ++ unsigned set_cond : 4; /* 2nd byte */ ++ unsigned saturate : 1; ++ unsigned centroid : 1; ++ unsigned flat : 1; ++ unsigned patch : 1; ++ unsigned lanes : 4; /* 3rd byte */ ++ unsigned tex_dim : 2; ++ unsigned tex_array : 1; ++ unsigned tex_cube : 1; ++ unsigned tex_shadow : 1; /* 4th byte */ ++ unsigned tex_live : 1; ++ unsigned tex_mask : 4; ++ ++ uint8_t quadop; ++}; ++ ++static INLINE int ++nvi_vector_size(struct nv_instruction *nvi) ++{ ++ int i; ++ assert(nvi); ++ for (i = 0; i < 5 && nvi->def[i]; ++i); ++ return i; ++} ++ ++#define CFG_EDGE_FORWARD 0 ++#define CFG_EDGE_BACK 1 ++#define CFG_EDGE_LOOP_ENTER 2 ++#define CFG_EDGE_LOOP_LEAVE 4 ++#define CFG_EDGE_FAKE 8 ++ ++/* 'WALL' edge means where reachability check doesn't follow */ ++/* 'LOOP' edge means just having to do with loops */ ++#define IS_LOOP_EDGE(k) ((k) & 7) ++#define IS_WALL_EDGE(k) ((k) & 9) ++ ++struct nv_basic_block { ++ struct nv_instruction *entry; /* first non-phi instruction */ ++ struct nv_instruction *exit; ++ struct nv_instruction *phi; /* very first instruction */ ++ int num_instructions; ++ ++ struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ ++ struct nv_basic_block *in[8]; /* hope that suffices */ ++ uint num_in; ++ ubyte out_kind[2]; ++ ubyte in_kind[8]; ++ ++ int id; ++ int subroutine; ++ uint priv; /* reset to 0 after you're done */ ++ uint pass_seq; ++ ++ uint32_t emit_pos; /* position, size in emitted code (in bytes) */ ++ uint32_t emit_size; ++ ++ uint32_t live_set[NV_PC_MAX_VALUES / 32]; ++}; ++ ++struct nvc0_translation_info; ++ ++struct nv_pc { ++ struct nv_basic_block **root; ++ struct nv_basic_block *current_block; ++ struct nv_basic_block *parent_block; ++ ++ int loop_nesting_bound; ++ uint pass_seq; ++ ++ struct nv_value values[NV_PC_MAX_VALUES]; ++ struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; ++ struct nv_ref **refs; ++ struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; ++ int num_values; ++ int num_instructions; ++ int num_refs; ++ int num_blocks; ++ int num_subroutines; ++ ++ int max_reg[4]; ++ ++ uint32_t *immd_buf; /* populated on emit */ ++ unsigned immd_count; ++ ++ uint32_t *emit; ++ uint32_t emit_size; ++ uint32_t emit_pos; ++ ++ void *reloc_entries; ++ unsigned num_relocs; ++ ++ /* optimization enables */ ++ boolean opt_reload_elim; ++ boolean is_fragprog; ++}; ++ ++void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *); ++void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *); ++void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *); ++ ++static INLINE struct nv_instruction * ++nv_alloc_instruction(struct nv_pc *pc, uint opcode) ++{ ++ struct nv_instruction *insn; ++ ++ insn = &pc->instructions[pc->num_instructions++]; ++ assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); ++ ++ insn->opcode = opcode; ++ insn->cc = 0; ++ insn->indirect = -1; ++ insn->predicate = -1; ++ ++ return insn; ++} ++ ++static INLINE struct nv_instruction * ++new_instruction(struct nv_pc *pc, uint opcode) ++{ ++ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); ++ ++ nvc0_insn_append(pc->current_block, insn); ++ return insn; ++} ++ ++static INLINE struct nv_instruction * ++new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode) ++{ ++ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); ++ ++ nvc0_insn_insert_after(at, insn); ++ return insn; ++} ++ ++static INLINE struct nv_value * ++new_value(struct nv_pc *pc, ubyte file, ubyte size) ++{ ++ struct nv_value *value = &pc->values[pc->num_values]; ++ ++ assert(pc->num_values < NV_PC_MAX_VALUES - 1); ++ ++ value->n = pc->num_values++; ++ value->join = value; ++ value->reg.id = -1; ++ value->reg.file = file; ++ value->reg.size = size; ++ return value; ++} ++ ++static INLINE struct nv_value * ++new_value_like(struct nv_pc *pc, struct nv_value *like) ++{ ++ return new_value(pc, like->reg.file, like->reg.size); ++} ++ ++static INLINE struct nv_ref * ++new_ref(struct nv_pc *pc, struct nv_value *val) ++{ ++ int i; ++ struct nv_ref *ref; ++ ++ if ((pc->num_refs % 64) == 0) { ++ const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); ++ const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); ++ ++ pc->refs = REALLOC(pc->refs, old_size, new_size); ++ ++ ref = CALLOC(64, sizeof(struct nv_ref)); ++ for (i = 0; i < 64; ++i) ++ pc->refs[pc->num_refs + i] = &ref[i]; ++ } ++ ++ ref = pc->refs[pc->num_refs++]; ++ ref->value = val; ++ ++ LIST_INITHEAD(&ref->list); ++ ++ ++val->refc; ++ return ref; ++} ++ ++static INLINE struct nv_basic_block * ++new_basic_block(struct nv_pc *pc) ++{ ++ struct nv_basic_block *bb; ++ ++ if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) ++ return NULL; ++ ++ bb = CALLOC_STRUCT(nv_basic_block); ++ ++ bb->id = pc->num_blocks; ++ pc->bb_list[pc->num_blocks++] = bb; ++ return bb; ++} ++ ++static INLINE void ++nv_reference(struct nv_pc *pc, ++ struct nv_instruction *nvi, int c, struct nv_value *s) ++{ ++ struct nv_ref **d = &nvi->src[c]; ++ assert(c < 6); ++ ++ if (*d) { ++ --(*d)->value->refc; ++ LIST_DEL(&(*d)->list); ++ } ++ ++ if (s) { ++ if (!*d) { ++ *d = new_ref(pc, s); ++ (*d)->insn = nvi; ++ } else { ++ LIST_DEL(&(*d)->list); ++ (*d)->value = s; ++ ++(s->refc); ++ } ++ if (!s->last_use) ++ s->last_use = *d; ++ else ++ LIST_ADDTAIL(&s->last_use->list, &(*d)->list); ++ ++ s->last_use = *d; ++ (*d)->insn = nvi; ++ } else { ++ *d = NULL; ++ } ++} ++ ++/* nvc0_emit.c */ ++void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *); ++ ++/* nvc0_print.c */ ++const char *nvc0_opcode_name(uint opcode); ++void nvc0_print_instruction(struct nv_instruction *); ++ ++/* nvc0_pc.c */ ++void nvc0_print_function(struct nv_basic_block *root); ++void nvc0_print_program(struct nv_pc *); ++ ++boolean nvc0_insn_can_load(struct nv_instruction *, int s, ++ struct nv_instruction *); ++boolean nvc0_insn_is_predicateable(struct nv_instruction *); ++ ++int nvc0_insn_refcount(struct nv_instruction *); ++void nvc0_insn_delete(struct nv_instruction *); ++void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *); ++ ++void nvc0_bblock_attach(struct nv_basic_block *parent, ++ struct nv_basic_block *child, ubyte edge_kind); ++boolean nvc0_bblock_dominated_by(struct nv_basic_block *, ++ struct nv_basic_block *); ++boolean nvc0_bblock_reachable_by(struct nv_basic_block *future, ++ struct nv_basic_block *past, ++ struct nv_basic_block *final); ++struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *); ++ ++int nvc0_pc_replace_value(struct nv_pc *pc, ++ struct nv_value *old_val, ++ struct nv_value *new_val); ++ ++struct nv_value *nvc0_pc_find_immediate(struct nv_ref *); ++struct nv_value *nvc0_pc_find_constant(struct nv_ref *); ++ ++typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); ++ ++void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); ++ ++int nvc0_pc_exec_pass0(struct nv_pc *pc); ++int nvc0_pc_exec_pass1(struct nv_pc *pc); ++int nvc0_pc_exec_pass2(struct nv_pc *pc); ++ ++int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *); ++ ++#endif // NV50_COMPILER_H +diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c +new file mode 100644 +index 0000000..db8055d +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c +@@ -0,0 +1,979 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "nvc0_pc.h" ++#include "nvc0_program.h" ++ ++#define NVC0_FIXUP_CODE_RELOC 0 ++#define NVC0_FIXUP_DATA_RELOC 1 ++ ++struct nvc0_fixup { ++ uint8_t type; ++ int8_t shift; ++ uint32_t mask; ++ uint32_t data; ++ uint32_t ofst; ++}; ++ ++void ++nvc0_relocate_program(struct nvc0_program *prog, ++ uint32_t code_base, ++ uint32_t data_base) ++{ ++ struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs; ++ unsigned i; ++ ++ for (i = 0; i < prog->num_relocs; ++i) { ++ uint32_t data; ++ ++ switch (f[i].type) { ++ case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break; ++ case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break; ++ default: ++ data = f[i].data; ++ break; ++ } ++ data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift); ++ ++ prog->code[f[i].ofst / 4] &= ~f[i].mask; ++ prog->code[f[i].ofst / 4] |= data & f[i].mask; ++ } ++} ++ ++static void ++create_fixup(struct nv_pc *pc, uint8_t ty, ++ int w, uint32_t data, uint32_t m, int s) ++{ ++ struct nvc0_fixup *f; ++ ++ const unsigned size = sizeof(struct nvc0_fixup); ++ const unsigned n = pc->num_relocs; ++ ++ if (!(n % 8)) ++ pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size); ++ ++ f = (struct nvc0_fixup *)pc->reloc_entries; ++ ++ f[n].ofst = pc->emit_pos + w * 4; ++ f[n].type = ty; ++ f[n].data = data; ++ f[n].mask = m; ++ f[n].shift = s; ++ ++ ++pc->num_relocs; ++} ++ ++static INLINE ubyte ++SSIZE(struct nv_instruction *nvi, int s) ++{ ++ return nvi->src[s]->value->reg.size; ++} ++ ++static INLINE ubyte ++DSIZE(struct nv_instruction *nvi, int d) ++{ ++ return nvi->def[d]->reg.size; ++} ++ ++static INLINE struct nv_reg * ++SREG(struct nv_ref *ref) ++{ ++ if (!ref) ++ return NULL; ++ return &ref->value->join->reg; ++} ++ ++static INLINE struct nv_reg * ++DREG(struct nv_value *val) ++{ ++ if (!val) ++ return NULL; ++ return &val->join->reg; ++} ++ ++static INLINE ubyte ++SFILE(struct nv_instruction *nvi, int s) ++{ ++ return nvi->src[s]->value->reg.file; ++} ++ ++static INLINE ubyte ++DFILE(struct nv_instruction *nvi, int d) ++{ ++ return nvi->def[0]->reg.file; ++} ++ ++static INLINE void ++SID(struct nv_pc *pc, struct nv_ref *ref, int pos) ++{ ++ pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32); ++} ++ ++static INLINE void ++DID(struct nv_pc *pc, struct nv_value *val, int pos) ++{ ++ pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32); ++} ++ ++static INLINE uint32_t ++get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */ ++{ ++ assert(ref->value->reg.file == NV_FILE_IMM); ++ return ref->value->reg.imm.u32; ++} ++ ++static INLINE void ++set_immd_u32_l(struct nv_pc *pc, uint32_t u32) ++{ ++ pc->emit[0] |= (u32 & 0x3f) << 26; ++ pc->emit[1] |= u32 >> 6; ++} ++ ++static INLINE void ++set_immd_u32(struct nv_pc *pc, uint32_t u32) ++{ ++ if ((pc->emit[0] & 0xf) == 0x2) { ++ set_immd_u32_l(pc, u32); ++ } else ++ if ((pc->emit[0] & 0xf) == 0x3) { ++ assert(!(pc->emit[1] & 0xc000)); ++ pc->emit[1] |= 0xc000; ++ assert(!(u32 & 0xfff00000)); ++ set_immd_u32_l(pc, u32); ++ } else { ++ assert(!(pc->emit[1] & 0xc000)); ++ pc->emit[1] |= 0xc000; ++ assert(!(u32 & 0xfff)); ++ set_immd_u32_l(pc, u32 >> 12); ++ } ++} ++ ++static INLINE void ++set_immd(struct nv_pc *pc, struct nv_instruction *i, int s) ++{ ++ set_immd_u32(pc, get_immd_u32(i->src[s])); ++} ++ ++static INLINE void ++DVS(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ uint s = i->def[0]->reg.size; ++ int n; ++ for (n = 1; n < 4 && i->def[n]; ++n) ++ s += i->def[n]->reg.size; ++ pc->emit[0] |= ((s / 4) - 1) << 5; ++} ++ ++static INLINE void ++SVS(struct nv_pc *pc, struct nv_ref *src) ++{ ++ pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5; ++} ++ ++static void ++set_pred(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ if (i->predicate >= 0) { ++ SID(pc, i->src[i->predicate], 6); ++ if (i->cc) ++ pc->emit[0] |= 0x2000; /* negate */ ++ } else { ++ pc->emit[0] |= 0x1c00; ++ } ++} ++ ++static INLINE void ++set_address_16(struct nv_pc *pc, struct nv_ref *src) ++{ ++ pc->emit[0] |= (src->value->reg.address & 0x003f) << 26; ++ pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6; ++} ++ ++static INLINE unsigned ++const_space_index(struct nv_instruction *i, int s) ++{ ++ return SFILE(i, s) - NV_FILE_MEM_C(0); ++} ++ ++static void ++emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) ++{ ++ pc->emit[0] = 0x00000007; ++ pc->emit[1] = op << 24; ++ ++ if (op == 0x40 || (op >= 0x80 && op <= 0x98)) { ++ /* bra, exit, ret or kil */ ++ pc->emit[0] |= 0x1e0; ++ set_pred(pc, i); ++ } ++ ++ if (i->target) { ++ int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8); ++ ++ /* we will need relocations only for global functions */ ++ /* ++ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000); ++ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff); ++ */ ++ ++ pc->emit[0] |= (pcrel & 0x3f) << 26; ++ pc->emit[1] |= (pcrel >> 6) & 0x1ffff; ++ } ++} ++ ++/* doesn't work for vfetch, export, ld, st, mov ... */ ++static void ++emit_form_0(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ int s; ++ ++ set_pred(pc, i); ++ ++ DID(pc, i->def[0], 14); ++ ++ for (s = 0; s < 3 && i->src[s]; ++s) { ++ if (SFILE(i, s) >= NV_FILE_MEM_C(0) && ++ SFILE(i, s) <= NV_FILE_MEM_C(15)) { ++ assert(!(pc->emit[1] & 0xc000)); ++ assert(s <= 1); ++ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); ++ set_address_16(pc, i->src[s]); ++ } else ++ if (SFILE(i, s) == NV_FILE_GPR) { ++ SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20); ++ } else ++ if (SFILE(i, s) == NV_FILE_IMM) { ++ assert(!(pc->emit[1] & 0xc000)); ++ assert(s == 1 || i->opcode == NV_OP_MOV); ++ set_immd(pc, i, s); ++ } ++ } ++} ++ ++static void ++emit_form_1(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ int s; ++ ++ set_pred(pc, i); ++ ++ DID(pc, i->def[0], 14); ++ ++ for (s = 0; s < 1 && i->src[s]; ++s) { ++ if (SFILE(i, s) >= NV_FILE_MEM_C(0) && ++ SFILE(i, s) <= NV_FILE_MEM_C(15)) { ++ assert(!(pc->emit[1] & 0xc000)); ++ assert(s <= 1); ++ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); ++ set_address_16(pc, i->src[s]); ++ } else ++ if (SFILE(i, s) == NV_FILE_GPR) { ++ SID(pc, i->src[s], 26); ++ } else ++ if (SFILE(i, s) == NV_FILE_IMM) { ++ assert(!(pc->emit[1] & 0xc000)); ++ assert(s == 1 || i->opcode == NV_OP_MOV); ++ set_immd(pc, i, s); ++ } ++ } ++} ++ ++static void ++emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ if (i->src[0]->mod & NV_MOD_ABS) ++ pc->emit[0] |= 1 << 7; ++ if (i->src[0]->mod & NV_MOD_NEG) ++ pc->emit[0] |= 1 << 9; ++ if (i->src[1]->mod & NV_MOD_ABS) ++ pc->emit[0] |= 1 << 6; ++ if (i->src[1]->mod & NV_MOD_NEG) ++ pc->emit[0] |= 1 << 8; ++} ++ ++static void ++emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0x50000000; ++ ++ emit_form_0(pc, i); ++ ++ emit_neg_abs_1_2(pc, i); ++ ++ if (i->saturate) ++ pc->emit[1] |= 1 << 17; ++} ++ ++static void ++emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0x58000000; ++ ++ emit_form_0(pc, i); ++ ++ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) ++ pc->emit[1] |= 1 << 25; ++ ++ if (i->saturate) ++ pc->emit[0] |= 1 << 5; ++} ++ ++static void ++emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0x30000000; ++ ++ emit_form_0(pc, i); ++ ++ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) ++ pc->emit[0] |= 1 << 9; ++ ++ if (i->src[2]->mod & NV_MOD_NEG) ++ pc->emit[0] |= 1 << 8; ++ ++ if (i->saturate) ++ pc->emit[0] |= 1 << 5; ++} ++ ++static void ++emit_minmax(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0x08000000; ++ ++ if (NV_BASEOP(i->opcode) == NV_OP_MAX) ++ pc->emit[1] |= 0x001e0000; ++ else ++ pc->emit[1] |= 0x000e0000; /* predicate ? */ ++ ++ emit_form_0(pc, i); ++ ++ emit_neg_abs_1_2(pc, i); ++ ++ switch (i->opcode) { ++ case NV_OP_MIN_U32: ++ case NV_OP_MAX_U32: ++ pc->emit[0] |= 3; ++ break; ++ case NV_OP_MIN_S32: ++ case NV_OP_MAX_S32: ++ pc->emit[0] |= 3 | (1 << 5); ++ break; ++ case NV_OP_MIN_F32: ++ case NV_OP_MAX_F32: ++ default: ++ break; ++ } ++} ++ ++static void ++emit_tex(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ int src1 = i->tex_array + i->tex_dim + i->tex_cube; ++ ++ pc->emit[0] = 0x00000086; ++ pc->emit[1] = 0x80000000; ++ ++ switch (i->opcode) { ++ case NV_OP_TEX: pc->emit[1] = 0x80000000; break; ++ case NV_OP_TXB: pc->emit[1] = 0x84000000; break; ++ case NV_OP_TXL: pc->emit[1] = 0x86000000; break; ++ case NV_OP_TXF: pc->emit[1] = 0x90000000; break; ++ case NV_OP_TXG: pc->emit[1] = 0xe0000000; break; ++ default: ++ assert(0); ++ break; ++ } ++ ++ if (i->tex_array) ++ pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */ ++ if (i->tex_shadow) ++ pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */ ++ ++ set_pred(pc, i); ++ ++ DID(pc, i->def[0], 14); ++ SID(pc, i->src[0], 20); ++ SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */ ++ ++ pc->emit[1] |= i->tex_mask << 14; ++ pc->emit[1] |= (i->tex_dim - 1) << 20; ++ if (i->tex_cube) ++ pc->emit[1] |= 3 << 20; ++ ++ assert(i->ext.tex.s < 16); ++ ++ pc->emit[1] |= i->ext.tex.t; ++ pc->emit[1] |= i->ext.tex.s << 8; ++ ++ if (i->tex_live) ++ pc->emit[0] |= 1 << 9; ++} ++ ++/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */ ++static void ++emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0xc8000000; ++ ++ set_pred(pc, i); ++ ++ DID(pc, i->def[0], 14); ++ SID(pc, i->src[0], 20); ++ ++ pc->emit[0] |= op << 26; ++ ++ if (op >= 4) { ++ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; ++ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; ++ } else { ++ assert(!i->src[0]->mod); ++ } ++} ++ ++static void ++emit_quadop(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0x48000000; ++ ++ set_pred(pc, i); ++ ++ assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR); ++ ++ DID(pc, i->def[0], 14); ++ SID(pc, i->src[0], 20); ++ SID(pc, i->src[0], 26); ++ ++ pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */ ++ pc->emit[1] |= i->quadop; ++} ++ ++static void ++emit_ddx(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ i->quadop = 0x99; ++ i->lanes = 4; ++ emit_quadop(pc, i); ++} ++ ++static void ++emit_ddy(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ i->quadop = 0xa5; ++ i->lanes = 5; ++ emit_quadop(pc, i); ++} ++ ++/* preparation op (preex2, presin / convert to fixed point) */ ++static void ++emit_preop(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0x60000000; ++ ++ if (i->opcode == NV_OP_PREEX2) ++ pc->emit[0] |= 0x20; ++ ++ emit_form_1(pc, i); ++ ++ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8; ++ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6; ++} ++ ++static void ++emit_shift(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000003; ++ ++ switch (i->opcode) { ++ case NV_OP_SAR: ++ pc->emit[0] |= 0x20; /* fall through */ ++ case NV_OP_SHR: ++ pc->emit[1] = 0x58000000; ++ break; ++ case NV_OP_SHL: ++ default: ++ pc->emit[1] = 0x60000000; ++ break; ++ } ++ ++ emit_form_0(pc, i); ++} ++ ++static void ++emit_bitop(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ if (SFILE(i, 1) == NV_FILE_IMM) { ++ pc->emit[0] = 0x00000002; ++ pc->emit[1] = 0x38000000; ++ } else { ++ pc->emit[0] = 0x00000003; ++ pc->emit[1] = 0x68000000; ++ } ++ ++ switch (i->opcode) { ++ case NV_OP_OR: ++ pc->emit[0] |= 0x40; ++ break; ++ case NV_OP_XOR: ++ pc->emit[0] |= 0x80; ++ break; ++ case NV_OP_AND: ++ default: ++ break; ++ } ++ ++ emit_form_0(pc, i); ++} ++ ++static void ++emit_set(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ ++ switch (i->opcode) { ++ case NV_OP_SET_S32: ++ pc->emit[0] |= 0x20; /* fall through */ ++ case NV_OP_SET_U32: ++ pc->emit[0] |= 0x3; ++ pc->emit[1] = 0x100e0000; ++ break; ++ case NV_OP_SET_F32_AND: ++ pc->emit[1] = 0x18000000; ++ break; ++ case NV_OP_SET_F32_OR: ++ pc->emit[1] = 0x18200000; ++ break; ++ case NV_OP_SET_F32_XOR: ++ pc->emit[1] = 0x18400000; ++ break; ++ case NV_OP_FSET_F32: ++ pc->emit[0] |= 0x20; /* fall through */ ++ case NV_OP_SET_F32: ++ default: ++ pc->emit[1] = 0x180e0000; ++ break; ++ } ++ ++ if (DFILE(i, 0) == NV_FILE_PRED) { ++ pc->emit[0] |= 0x1c000; ++ pc->emit[1] += 0x08000000; ++ } ++ ++ pc->emit[1] |= i->set_cond << 23; ++ ++ emit_form_0(pc, i); ++ ++ emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */ ++} ++ ++static void ++emit_selp(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000004; ++ pc->emit[1] = 0x20000000; ++ ++ emit_form_0(pc, i); ++ ++ if (i->cc || (i->src[2]->mod & NV_MOD_NOT)) ++ pc->emit[1] |= 1 << 20; ++} ++ ++static void ++emit_slct(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ ++ switch (i->opcode) { ++ case NV_OP_SLCT_S32: ++ pc->emit[0] |= 0x20; /* fall through */ ++ case NV_OP_SLCT_U32: ++ pc->emit[0] |= 0x3; ++ pc->emit[1] = 0x30000000; ++ break; ++ case NV_OP_SLCT_F32: ++ default: ++ pc->emit[1] = 0x38000000; ++ break; ++ } ++ ++ emit_form_0(pc, i); ++ ++ pc->emit[1] |= i->set_cond << 23; ++} ++ ++static void ++emit_cvt(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000004; ++ pc->emit[1] = 0x10000000; ++ ++ if (i->opcode != NV_OP_CVT) ++ i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); ++ ++ switch (i->ext.cvt.d) { ++ case NV_TYPE_F32: ++ switch (i->ext.cvt.s) { ++ case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; ++ case NV_TYPE_S32: pc->emit[0] |= 0x200; ++ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; ++ } ++ break; ++ case NV_TYPE_S32: pc->emit[0] |= 0x80; ++ case NV_TYPE_U32: ++ switch (i->ext.cvt.s) { ++ case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; ++ case NV_TYPE_S32: pc->emit[0] |= 0x200; ++ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; ++ } ++ break; ++ default: ++ assert(!"cvt: unknown type"); ++ break; ++ } ++ ++ if (i->opcode == NV_OP_FLOOR) ++ pc->emit[1] |= 0x00020000; ++ else ++ if (i->opcode == NV_OP_CEIL) ++ pc->emit[1] |= 0x00040000; ++ else ++ if (i->opcode == NV_OP_TRUNC) ++ pc->emit[1] |= 0x00060000; ++ ++ if (i->saturate || i->opcode == NV_OP_SAT) ++ pc->emit[0] |= 0x20; ++ ++ if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS) ++ pc->emit[0] |= 1 << 6; ++ if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG) ++ pc->emit[0] |= 1 << 8; ++ ++ pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20; ++ pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23; ++ ++ emit_form_1(pc, i); ++} ++ ++static void ++emit_interp(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000000; ++ pc->emit[1] = 0xc07e0000; ++ ++ DID(pc, i->def[0], 14); ++ ++ set_pred(pc, i); ++ ++ if (i->indirect) ++ SID(pc, i->src[i->indirect], 20); ++ else ++ SID(pc, NULL, 20); ++ ++ if (i->opcode == NV_OP_PINTERP) { ++ pc->emit[0] |= 0x040; ++ SID(pc, i->src[1], 26); ++ } else { ++ SID(pc, NULL, 26); ++ } ++ ++ pc->emit[1] |= i->src[0]->value->reg.address & 0xffff; ++ ++ if (i->centroid) ++ pc->emit[0] |= 0x100; ++ else ++ if (i->flat) ++ pc->emit[0] |= 0x080; ++} ++ ++static void ++emit_vfetch(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x03f00006; ++ pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address; ++ if (i->patch) ++ pc->emit[0] |= 0x100; ++ ++ set_pred(pc, i); ++ ++ DVS(pc, i); ++ DID(pc, i->def[0], 14); ++ ++ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26); ++} ++ ++static void ++emit_export(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000006; ++ pc->emit[1] = 0x0a000000; ++ if (i->patch) ++ pc->emit[0] |= 0x100; ++ ++ set_pred(pc, i); ++ ++ assert(SFILE(i, 0) == NV_FILE_MEM_V); ++ assert(SFILE(i, 1) == NV_FILE_GPR); ++ ++ SID(pc, i->src[1], 26); /* register source */ ++ SVS(pc, i->src[0]); ++ ++ pc->emit[1] |= i->src[0]->value->reg.address & 0xfff; ++ ++ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); ++} ++ ++static void ++emit_mov(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ if (i->opcode == NV_OP_MOV) ++ i->lanes = 0xf; ++ ++ if (SFILE(i, 0) == NV_FILE_IMM) { ++ pc->emit[0] = 0x000001e2; ++ pc->emit[1] = 0x18000000; ++ } else ++ if (SFILE(i, 0) == NV_FILE_PRED) { ++ pc->emit[0] = 0x1c000004; ++ pc->emit[1] = 0x080e0000; ++ } else { ++ pc->emit[0] = 0x00000004 | (i->lanes << 5); ++ pc->emit[1] = 0x28000000; ++ } ++ ++ emit_form_1(pc, i); ++} ++ ++static void ++emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ assert(NV_IS_MEMORY_FILE(SFILE(i, 0))); ++ ++ switch (SSIZE(i, 0)) { ++ case 1: ++ if (NV_TYPE_ISSGD(i->ext.cvt.s)) ++ pc->emit[0] |= 0x20; ++ break; ++ case 2: ++ pc->emit[0] |= 0x40; ++ if (NV_TYPE_ISSGD(i->ext.cvt.s)) ++ pc->emit[0] |= 0x20; ++ break; ++ case 4: pc->emit[0] |= 0x80; break; ++ case 8: pc->emit[0] |= 0xa0; break; ++ case 16: pc->emit[0] |= 0xc0; break; ++ default: ++ NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0)); ++ break; ++ } ++} ++ ++static void ++emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ pc->emit[0] = 0x00000006; ++ pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); ++ ++ emit_ldst_size(pc, i); ++ ++ set_pred(pc, i); ++ set_address_16(pc, i->src[0]); ++ ++ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); ++ DID(pc, i->def[0], 14); ++} ++ ++static void ++emit_ld(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ if (SFILE(i, 0) >= NV_FILE_MEM_C(0) && ++ SFILE(i, 0) <= NV_FILE_MEM_C(15)) { ++ if (SSIZE(i, 0) == 4 && i->indirect < 0) { ++ i->lanes = 0xf; ++ emit_mov(pc, i); ++ } else { ++ emit_ld_const(pc, i); ++ } ++ } else { ++ NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); ++ abort(); ++ } ++} ++ ++static void ++emit_st(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ NOUVEAU_ERR("emit_st: not handled yet\n"); ++ abort(); ++} ++ ++void ++nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) ++{ ++ debug_printf("EMIT: "); nvc0_print_instruction(i); ++ ++ switch (i->opcode) { ++ case NV_OP_VFETCH: ++ emit_vfetch(pc, i); ++ break; ++ case NV_OP_EXPORT: ++ if (!pc->is_fragprog) ++ emit_export(pc, i); ++ break; ++ case NV_OP_MOV: ++ emit_mov(pc, i); ++ break; ++ case NV_OP_LD: ++ emit_ld(pc, i); ++ break; ++ case NV_OP_ST: ++ emit_st(pc, i); ++ break; ++ case NV_OP_LINTERP: ++ case NV_OP_PINTERP: ++ emit_interp(pc, i); ++ break; ++ case NV_OP_ADD_F32: ++ emit_add_f32(pc, i); ++ break; ++ case NV_OP_AND: ++ case NV_OP_OR: ++ case NV_OP_XOR: ++ emit_bitop(pc, i); ++ break; ++ case NV_OP_CVT: ++ case NV_OP_ABS_F32: ++ case NV_OP_ABS_S32: ++ case NV_OP_NEG_F32: ++ case NV_OP_NEG_S32: ++ case NV_OP_SAT: ++ case NV_OP_CEIL: ++ case NV_OP_FLOOR: ++ case NV_OP_TRUNC: ++ emit_cvt(pc, i); ++ break; ++ case NV_OP_DFDX: ++ emit_ddx(pc, i); ++ break; ++ case NV_OP_DFDY: ++ emit_ddy(pc, i); ++ break; ++ case NV_OP_COS: ++ emit_flop(pc, i, 0); ++ break; ++ case NV_OP_SIN: ++ emit_flop(pc, i, 1); ++ break; ++ case NV_OP_EX2: ++ emit_flop(pc, i, 2); ++ break; ++ case NV_OP_LG2: ++ emit_flop(pc, i, 3); ++ break; ++ case NV_OP_RCP: ++ emit_flop(pc, i, 4); ++ break; ++ case NV_OP_RSQ: ++ emit_flop(pc, i, 5); ++ break; ++ case NV_OP_PRESIN: ++ case NV_OP_PREEX2: ++ emit_preop(pc, i); ++ break; ++ case NV_OP_MAD_F32: ++ emit_mad_f32(pc, i); ++ break; ++ case NV_OP_MAX_F32: ++ case NV_OP_MAX_S32: ++ case NV_OP_MAX_U32: ++ case NV_OP_MIN_F32: ++ case NV_OP_MIN_S32: ++ case NV_OP_MIN_U32: ++ emit_minmax(pc, i); ++ break; ++ case NV_OP_MUL_F32: ++ emit_mul_f32(pc, i); ++ break; ++ case NV_OP_SET_F32: ++ case NV_OP_SET_F32_AND: ++ case NV_OP_SET_F32_OR: ++ case NV_OP_SET_F32_XOR: ++ case NV_OP_SET_S32: ++ case NV_OP_SET_U32: ++ case NV_OP_FSET_F32: ++ emit_set(pc, i); ++ break; ++ case NV_OP_SHL: ++ case NV_OP_SHR: ++ case NV_OP_SAR: ++ emit_shift(pc, i); ++ break; ++ case NV_OP_TEX: ++ case NV_OP_TXB: ++ case NV_OP_TXL: ++ emit_tex(pc, i); ++ break; ++ case NV_OP_BRA: ++ emit_flow(pc, i, 0x40); ++ break; ++ case NV_OP_CALL: ++ emit_flow(pc, i, 0x50); ++ break; ++ case NV_OP_JOINAT: ++ emit_flow(pc, i, 0x60); ++ break; ++ case NV_OP_EXIT: ++ emit_flow(pc, i, 0x80); ++ break; ++ case NV_OP_RET: ++ emit_flow(pc, i, 0x90); ++ break; ++ case NV_OP_KIL: ++ emit_flow(pc, i, 0x98); ++ break; ++ case NV_OP_JOIN: ++ case NV_OP_NOP: ++ pc->emit[0] = 0x00003de4; ++ pc->emit[1] = 0x40000000; ++ break; ++ case NV_OP_SELP: ++ emit_selp(pc, i); ++ break; ++ case NV_OP_SLCT_F32: ++ case NV_OP_SLCT_S32: ++ case NV_OP_SLCT_U32: ++ emit_slct(pc, i); ++ break; ++ default: ++ NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode); ++ abort(); ++ break; ++ } ++ ++ if (i->join) ++ pc->emit[0] |= 0x10; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +new file mode 100644 +index 0000000..acc72bf +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +@@ -0,0 +1,1236 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "nvc0_pc.h" ++#include "nvc0_program.h" ++ ++#define DESCEND_ARBITRARY(j, f) \ ++do { \ ++ b->pass_seq = ctx->pc->pass_seq; \ ++ \ ++ for (j = 0; j < 2; ++j) \ ++ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \ ++ f(ctx, b->out[j]); \ ++} while (0) ++ ++static INLINE boolean ++registers_interfere(struct nv_value *a, struct nv_value *b) ++{ ++ if (a->reg.file != b->reg.file) ++ return FALSE; ++ if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file)) ++ return FALSE; ++ ++ assert(a->join->reg.id >= 0 && b->join->reg.id >= 0); ++ ++ if (a->join->reg.id < b->join->reg.id) { ++ return (a->join->reg.id + a->reg.size >= b->join->reg.id); ++ } else ++ if (a->join->reg.id > b->join->reg.id) { ++ return (b->join->reg.id + b->reg.size >= a->join->reg.id); ++ } ++ ++ return FALSE; ++} ++ ++static INLINE boolean ++values_equal(struct nv_value *a, struct nv_value *b) ++{ ++ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) ++ return FALSE; ++ if (NV_IS_MEMORY_FILE(a->reg.file)) ++ return a->reg.address == b->reg.address; ++ else ++ return a->join->reg.id == b->join->reg.id; ++} ++ ++#if 0 ++static INLINE boolean ++inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b) ++{ ++ int si, di; ++ ++ for (di = 0; di < 4 && a->def[di]; ++di) ++ for (si = 0; si < 5 && b->src[si]; ++si) ++ if (registers_interfere(a->def[di], b->src[si]->value)) ++ return FALSE; ++ ++ return TRUE; ++} ++ ++/* Check whether we can swap the order of the instructions, ++ * where a & b may be either the earlier or the later one. ++ */ ++static boolean ++inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b) ++{ ++ return inst_commutation_check(a, b) && inst_commutation_check(b, a); ++} ++#endif ++ ++static INLINE boolean ++inst_removable(struct nv_instruction *nvi) ++{ ++ if (nvi->opcode == NV_OP_ST) ++ return FALSE; ++ return (!(nvi->terminator || ++ nvi->join || ++ nvi->target || ++ nvi->fixed || ++ nvc0_insn_refcount(nvi))); ++} ++ ++static INLINE boolean ++inst_is_noop(struct nv_instruction *nvi) ++{ ++ if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND) ++ return TRUE; ++ if (nvi->terminator || nvi->join) ++ return FALSE; ++ if (nvi->def[0] && nvi->def[0]->join->reg.id < 0) ++ return TRUE; ++ if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT) ++ return FALSE; ++ if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file) ++ return FALSE; ++ ++ if (nvi->src[0]->value->join->reg.id < 0) { ++ NOUVEAU_DBG("inst_is_noop: orphaned value detected\n"); ++ return TRUE; ++ } ++ ++ if (nvi->opcode == NV_OP_SELECT) ++ if (!values_equal(nvi->def[0], nvi->src[1]->value)) ++ return FALSE; ++ return values_equal(nvi->def[0], nvi->src[0]->value); ++} ++ ++struct nv_pass { ++ struct nv_pc *pc; ++ int n; ++ void *priv; ++}; ++ ++static int ++nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b); ++ ++static void ++nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) ++{ ++ struct nv_pc *pc = (struct nv_pc *)priv; ++ struct nv_basic_block *in; ++ struct nv_instruction *nvi, *next; ++ int j; ++ ++ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); ++ ++ if (j >= 0) { ++ in = pc->bb_list[j]; ++ ++ /* check for no-op branches (BRA $PC+8) */ ++ if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) { ++ in->emit_size -= 8; ++ pc->emit_size -= 8; ++ ++ for (++j; j < pc->num_blocks; ++j) ++ pc->bb_list[j]->emit_pos -= 8; ++ ++ nvc0_insn_delete(in->exit); ++ } ++ b->emit_pos = in->emit_pos + in->emit_size; ++ } ++ ++ pc->bb_list[pc->num_blocks++] = b; ++ ++ /* visit node */ ++ ++ for (nvi = b->entry; nvi; nvi = next) { ++ next = nvi->next; ++ if (inst_is_noop(nvi) || ++ (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) { ++ nvc0_insn_delete(nvi); ++ } else ++ b->emit_size += 8; ++ } ++ pc->emit_size += b->emit_size; ++ ++#ifdef NOUVEAU_DEBUG ++ if (!b->entry) ++ debug_printf("BB:%i is now empty\n", b->id); ++ else ++ debug_printf("BB:%i size = %u\n", b->id, b->emit_size); ++#endif ++} ++ ++static int ++nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) ++{ ++ struct nv_pass pass; ++ ++ pass.pc = pc; ++ ++ pc->pass_seq++; ++ nv_pass_flatten(&pass, root); ++ ++ nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); ++ ++ return 0; ++} ++ ++int ++nvc0_pc_exec_pass2(struct nv_pc *pc) ++{ ++ int i, ret; ++ ++ NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks); ++ ++ pc->num_blocks = 0; /* will reorder bb_list */ ++ ++ for (i = 0; i < pc->num_subroutines + 1; ++i) ++ if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) ++ return ret; ++ return 0; ++} ++ ++static INLINE boolean ++is_cspace_load(struct nv_instruction *nvi) ++{ ++ if (!nvi) ++ return FALSE; ++ assert(nvi->indirect != 0); ++ return (nvi->opcode == NV_OP_LD && ++ nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && ++ nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15)); ++} ++ ++static INLINE boolean ++is_immd32_load(struct nv_instruction *nvi) ++{ ++ if (!nvi) ++ return FALSE; ++ return (nvi->opcode == NV_OP_MOV && ++ nvi->src[0]->value->reg.file == NV_FILE_IMM && ++ nvi->src[0]->value->reg.size == 4); ++} ++ ++static INLINE void ++check_swap_src_0_1(struct nv_instruction *nvi) ++{ ++ static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; ++ ++ struct nv_ref *src0 = nvi->src[0]; ++ struct nv_ref *src1 = nvi->src[1]; ++ ++ if (!nv_op_commutative(nvi->opcode)) ++ return; ++ assert(src0 && src1 && src0->value && src1->value); ++ ++ if (is_cspace_load(src0->value->insn)) { ++ if (!is_cspace_load(src1->value->insn)) { ++ nvi->src[0] = src1; ++ nvi->src[1] = src0; ++ } ++ } else ++ if (is_immd32_load(src0->value->insn)) { ++ if (!is_cspace_load(src1->value->insn) && ++ !is_immd32_load(src1->value->insn)) { ++ nvi->src[0] = src1; ++ nvi->src[1] = src0; ++ } ++ } ++ ++ if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET) ++ nvi->set_cond = cc_swapped[nvi->set_cond]; ++} ++ ++static void ++nvi_set_indirect_load(struct nv_pc *pc, ++ struct nv_instruction *nvi, struct nv_value *val) ++{ ++ for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect]; ++ ++nvi->indirect); ++ assert(nvi->indirect < 6); ++ nv_reference(pc, nvi, nvi->indirect, val); ++} ++ ++static int ++nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_instruction *nvi, *ld; ++ int s; ++ ++ for (nvi = b->entry; nvi; nvi = nvi->next) { ++ check_swap_src_0_1(nvi); ++ ++ for (s = 0; s < 3 && nvi->src[s]; ++s) { ++ ld = nvi->src[s]->value->insn; ++ if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV)) ++ continue; ++ if (!nvc0_insn_can_load(nvi, s, ld)) ++ continue; ++ ++ /* fold it ! */ ++ nv_reference(ctx->pc, nvi, s, ld->src[0]->value); ++ if (ld->indirect >= 0) ++ nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value); ++ ++ if (!nvc0_insn_refcount(ld)) ++ nvc0_insn_delete(ld); ++ } ++ } ++ DESCEND_ARBITRARY(s, nvc0_pass_fold_loads); ++ ++ return 0; ++} ++ ++/* NOTE: Assumes loads have not yet been folded. */ ++static int ++nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_instruction *nvi, *mi, *next; ++ int j; ++ uint8_t mod; ++ ++ for (nvi = b->entry; nvi; nvi = next) { ++ next = nvi->next; ++ if (nvi->opcode == NV_OP_SUB) { ++ nvi->src[1]->mod ^= NV_MOD_NEG; ++ nvi->opcode = NV_OP_ADD; ++ } ++ ++ for (j = 0; j < 3 && nvi->src[j]; ++j) { ++ mi = nvi->src[j]->value->insn; ++ if (!mi) ++ continue; ++ if (mi->def[0]->refc > 1 || mi->predicate >= 0) ++ continue; ++ ++ if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG; ++ else ++ if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS; ++ else ++ continue; ++ assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); ++ ++ mod |= mi->src[0]->mod; ++ ++ if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { ++ /* abs neg [abs] = abs */ ++ mod &= ~(NV_MOD_NEG | NV_MOD_ABS); ++ } else ++ if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { ++ /* neg as opcode and modifier on same insn cannot occur */ ++ /* neg neg abs = abs, neg neg = identity */ ++ assert(j == 0); ++ if (mod & NV_MOD_ABS) ++ nvi->opcode = NV_OP_ABS; ++ else ++ nvi->opcode = NV_OP_MOV; ++ mod = 0; ++ } ++ ++ if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod) ++ continue; ++ ++ nv_reference(ctx->pc, nvi, j, mi->src[0]->value); ++ ++ nvi->src[j]->mod ^= mod; ++ } ++ ++ if (nvi->opcode == NV_OP_SAT) { ++ mi = nvi->src[0]->value->insn; ++ ++ if (mi->def[0]->refc > 1 || ++ (mi->opcode != NV_OP_ADD && ++ mi->opcode != NV_OP_MUL && ++ mi->opcode != NV_OP_MAD)) ++ continue; ++ mi->saturate = 1; ++ mi->def[0] = nvi->def[0]; ++ mi->def[0]->insn = mi; ++ nvc0_insn_delete(nvi); ++ } ++ } ++ DESCEND_ARBITRARY(j, nv_pass_lower_mods); ++ ++ return 0; ++} ++ ++#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) ++ ++static void ++apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod) ++{ ++ if (mod & NV_MOD_ABS) { ++ if (type == NV_TYPE_F32) ++ *val &= 0x7fffffff; ++ else ++ if ((*val) & (1 << 31)) ++ *val = ~(*val) + 1; ++ } ++ if (mod & NV_MOD_NEG) { ++ if (type == NV_TYPE_F32) ++ *val ^= 0x80000000; ++ else ++ *val = ~(*val) + 1; ++ } ++ if (mod & NV_MOD_SAT) { ++ union { ++ float f; ++ uint32_t u; ++ int32_t i; ++ } u; ++ u.u = *val; ++ if (type == NV_TYPE_F32) { ++ u.f = CLAMP(u.f, -1.0f, 1.0f); ++ } else ++ if (type == NV_TYPE_U16) { ++ u.u = MIN2(u.u, 0xffff); ++ } else ++ if (type == NV_TYPE_S16) { ++ u.i = CLAMP(u.i, -32768, 32767); ++ } ++ *val = u.u; ++ } ++ if (mod & NV_MOD_NOT) ++ *val = ~*val; ++} ++ ++static void ++constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, ++ struct nv_value *src0, struct nv_value *src1) ++{ ++ struct nv_value *val; ++ union { ++ float f32; ++ uint32_t u32; ++ int32_t s32; ++ } u0, u1, u; ++ ubyte type; ++ ++ if (!nvi->def[0]) ++ return; ++ type = NV_OPTYPE(nvi->opcode); ++ ++ u.u32 = 0; ++ u0.u32 = src0->reg.imm.u32; ++ u1.u32 = src1->reg.imm.u32; ++ ++ apply_modifiers(&u0.u32, type, nvi->src[0]->mod); ++ apply_modifiers(&u1.u32, type, nvi->src[1]->mod); ++ ++ switch (nvi->opcode) { ++ case NV_OP_MAD_F32: ++ if (nvi->src[2]->value->reg.file != NV_FILE_GPR) ++ return; ++ /* fall through */ ++ case NV_OP_MUL_F32: ++ u.f32 = u0.f32 * u1.f32; ++ break; ++ case NV_OP_MUL_B32: ++ u.u32 = u0.u32 * u1.u32; ++ break; ++ case NV_OP_ADD_F32: ++ u.f32 = u0.f32 + u1.f32; ++ break; ++ case NV_OP_ADD_B32: ++ u.u32 = u0.u32 + u1.u32; ++ break; ++ case NV_OP_SUB_F32: ++ u.f32 = u0.f32 - u1.f32; ++ break; ++ /* ++ case NV_OP_SUB_B32: ++ u.u32 = u0.u32 - u1.u32; ++ break; ++ */ ++ default: ++ return; ++ } ++ ++ val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type)); ++ val->reg.imm.u32 = u.u32; ++ ++ nv_reference(pc, nvi, 1, NULL); ++ nv_reference(pc, nvi, 0, val); ++ ++ if (nvi->opcode == NV_OP_MAD_F32) { ++ nvi->src[1] = nvi->src[0]; ++ nvi->src[0] = nvi->src[2]; ++ nvi->src[2] = NULL; ++ nvi->opcode = NV_OP_ADD_F32; ++ ++ if (val->reg.imm.u32 == 0) { ++ nvi->src[1] = NULL; ++ nvi->opcode = NV_OP_MOV; ++ } ++ } else { ++ nvi->opcode = NV_OP_MOV; ++ } ++} ++ ++static void ++constant_operand(struct nv_pc *pc, ++ struct nv_instruction *nvi, struct nv_value *val, int s) ++{ ++ union { ++ float f32; ++ uint32_t u32; ++ int32_t s32; ++ } u; ++ int shift; ++ int t = s ? 0 : 1; ++ uint op; ++ ubyte type; ++ ++ if (!nvi->def[0]) ++ return; ++ type = NV_OPTYPE(nvi->opcode); ++ ++ u.u32 = val->reg.imm.u32; ++ apply_modifiers(&u.u32, type, nvi->src[s]->mod); ++ ++ if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) { ++ nvi->opcode = NV_OP_MOV; ++ nv_reference(pc, nvi, t, NULL); ++ if (s) { ++ nvi->src[0] = nvi->src[1]; ++ nvi->src[1] = NULL; ++ } ++ return; ++ } ++ ++ switch (nvi->opcode) { ++ case NV_OP_MUL_F32: ++ if (u.f32 == 1.0f || u.f32 == -1.0f) { ++ if (u.f32 == -1.0f) ++ nvi->src[t]->mod ^= NV_MOD_NEG; ++ switch (nvi->src[t]->mod) { ++ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; ++ case NV_MOD_NEG: op = NV_OP_NEG_F32; break; ++ case NV_MOD_ABS: op = NV_OP_ABS_F32; break; ++ default: ++ return; ++ } ++ nvi->opcode = op; ++ nv_reference(pc, nvi, 0, nvi->src[t]->value); ++ nv_reference(pc, nvi, 1, NULL); ++ nvi->src[0]->mod = 0; ++ } else ++ if (u.f32 == 2.0f || u.f32 == -2.0f) { ++ if (u.f32 == -2.0f) ++ nvi->src[t]->mod ^= NV_MOD_NEG; ++ nvi->opcode = NV_OP_ADD_F32; ++ nv_reference(pc, nvi, s, nvi->src[t]->value); ++ nvi->src[s]->mod = nvi->src[t]->mod; ++ } ++ case NV_OP_ADD_F32: ++ if (u.u32 == 0) { ++ switch (nvi->src[t]->mod) { ++ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; ++ case NV_MOD_NEG: op = NV_OP_NEG_F32; break; ++ case NV_MOD_ABS: op = NV_OP_ABS_F32; break; ++ case NV_MOD_NEG | NV_MOD_ABS: ++ op = NV_OP_CVT; ++ nvi->ext.cvt.s = nvi->ext.cvt.d = type; ++ break; ++ default: ++ return; ++ } ++ nvi->opcode = op; ++ nv_reference(pc, nvi, 0, nvi->src[t]->value); ++ nv_reference(pc, nvi, 1, NULL); ++ if (nvi->opcode != NV_OP_CVT) ++ nvi->src[0]->mod = 0; ++ } ++ case NV_OP_ADD_B32: ++ if (u.u32 == 0) { ++ assert(nvi->src[t]->mod == 0); ++ nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV; ++ nvi->ext.cvt.s = nvi->ext.cvt.d = type; ++ nv_reference(pc, nvi, 0, nvi->src[t]->value); ++ nv_reference(pc, nvi, 1, NULL); ++ } ++ break; ++ case NV_OP_MUL_B32: ++ /* multiplication by 0 already handled above */ ++ assert(nvi->src[s]->mod == 0); ++ shift = ffs(u.s32) - 1; ++ if (shift == 0) { ++ nvi->opcode = NV_OP_MOV; ++ nv_reference(pc, nvi, 0, nvi->src[t]->value); ++ nv_reference(pc, nvi, 1, NULL); ++ } else ++ if (u.s32 > 0 && u.s32 == (1 << shift)) { ++ nvi->opcode = NV_OP_SHL; ++ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift; ++ nv_reference(pc, nvi, 0, nvi->src[t]->value); ++ nv_reference(pc, nvi, 1, val); ++ break; ++ } ++ break; ++ case NV_OP_RCP: ++ u.f32 = 1.0f / u.f32; ++ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; ++ nvi->opcode = NV_OP_MOV; ++ assert(s == 0); ++ nv_reference(pc, nvi, 0, val); ++ break; ++ case NV_OP_RSQ: ++ u.f32 = 1.0f / sqrtf(u.f32); ++ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; ++ nvi->opcode = NV_OP_MOV; ++ assert(s == 0); ++ nv_reference(pc, nvi, 0, val); ++ break; ++ default: ++ break; ++ } ++} ++ ++static int ++nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_instruction *nvi, *next; ++ int j; ++ ++ for (nvi = b->entry; nvi; nvi = next) { ++ struct nv_value *src0, *src1, *src; ++ int s; ++ uint8_t mod[4]; ++ ++ next = nvi->next; ++ ++ src0 = nvc0_pc_find_immediate(nvi->src[0]); ++ src1 = nvc0_pc_find_immediate(nvi->src[1]); ++ ++ if (src0 && src1) ++ constant_expression(ctx->pc, nvi, src0, src1); ++ else { ++ if (src0) ++ constant_operand(ctx->pc, nvi, src0, 0); ++ else ++ if (src1) ++ constant_operand(ctx->pc, nvi, src1, 1); ++ } ++ ++ /* check if we can MUL + ADD -> MAD/FMA */ ++ if (nvi->opcode != NV_OP_ADD) ++ continue; ++ ++ src0 = nvi->src[0]->value; ++ src1 = nvi->src[1]->value; ++ ++ if (SRC_IS_MUL(src0) && src0->refc == 1) ++ src = src0; ++ else ++ if (SRC_IS_MUL(src1) && src1->refc == 1) ++ src = src1; ++ else ++ continue; ++ ++ /* could have an immediate from above constant_* */ ++ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) ++ continue; ++ s = (src == src0) ? 0 : 1; ++ ++ mod[0] = nvi->src[0]->mod; ++ mod[1] = nvi->src[1]->mod; ++ mod[2] = src->insn->src[0]->mod; ++ mod[3] = src->insn->src[0]->mod; ++ ++ if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) ++ continue; ++ ++ nvi->opcode = NV_OP_MAD; ++ nv_reference(ctx->pc, nvi, s, NULL); ++ nvi->src[2] = nvi->src[!s]; ++ ++ nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); ++ nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); ++ nvi->src[0]->mod = mod[2] ^ mod[s]; ++ nvi->src[1]->mod = mod[3]; ++ } ++ DESCEND_ARBITRARY(j, nv_pass_lower_arith); ++ ++ return 0; ++} ++ ++/* TODO: redundant store elimination */ ++ ++struct mem_record { ++ struct mem_record *next; ++ struct nv_instruction *insn; ++ uint32_t ofst; ++ uint32_t base; ++ uint32_t size; ++}; ++ ++#define MEM_RECORD_POOL_SIZE 1024 ++ ++struct pass_reld_elim { ++ struct nv_pc *pc; ++ ++ struct mem_record *imm; ++ struct mem_record *mem_v; ++ struct mem_record *mem_a; ++ struct mem_record *mem_c[16]; ++ struct mem_record *mem_l; ++ ++ struct mem_record pool[MEM_RECORD_POOL_SIZE]; ++ int alloc; ++}; ++ ++static void ++combine_load(struct mem_record *rec, struct nv_instruction *ld) ++{ ++ struct nv_instruction *fv = rec->insn; ++ struct nv_value *mem = ld->src[0]->value; ++ uint32_t size = rec->size + mem->reg.size; ++ int j; ++ int d = rec->size / 4; ++ ++ assert(rec->size < 16); ++ if (rec->ofst > mem->reg.address) { ++ if ((size == 8 && mem->reg.address & 3) || ++ (size > 8 && mem->reg.address & 7)) ++ return; ++ rec->ofst = mem->reg.address; ++ for (j = 0; j < d; ++j) ++ fv->def[d + j] = fv->def[j]; ++ d = 0; ++ } else ++ if ((size == 8 && rec->ofst & 3) || ++ (size > 8 && rec->ofst & 7)) { ++ return; ++ } ++ ++ for (j = 0; j < mem->reg.size / 4; ++j) { ++ fv->def[d] = ld->def[j]; ++ fv->def[d++]->insn = fv; ++ } ++ ++ fv->src[0]->value->reg.size = rec->size = size; ++ ++ nvc0_insn_delete(ld); ++} ++ ++static void ++combine_export(struct mem_record *rec, struct nv_instruction *ex) ++{ ++ ++} ++ ++static INLINE void ++add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec, ++ uint32_t base, uint32_t ofst, struct nv_instruction *nvi) ++{ ++ struct mem_record *it = &ctx->pool[ctx->alloc++]; ++ ++ it->next = *rec; ++ *rec = it; ++ it->base = base; ++ it->ofst = ofst; ++ it->insn = nvi; ++ it->size = nvi->src[0]->value->reg.size; ++} ++ ++/* vectorize and reuse loads from memory or of immediates */ ++static int ++nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) ++{ ++ struct mem_record **rec, *it; ++ struct nv_instruction *ld, *next; ++ struct nv_value *mem; ++ uint32_t base, ofst; ++ int s; ++ ++ for (ld = b->entry; ld; ld = next) { ++ next = ld->next; ++ ++ if (is_cspace_load(ld)) { ++ mem = ld->src[0]->value; ++ rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)]; ++ } else ++ if (ld->opcode == NV_OP_VFETCH) { ++ mem = ld->src[0]->value; ++ rec = &ctx->mem_a; ++ } else ++ if (ld->opcode == NV_OP_EXPORT) { ++ mem = ld->src[0]->value; ++ if (mem->reg.file != NV_FILE_MEM_V) ++ continue; ++ rec = &ctx->mem_v; ++ } else { ++ continue; ++ } ++ if (ld->def[0] && ld->def[0]->refc == 0) ++ continue; ++ ofst = mem->reg.address; ++ base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0; ++ ++ for (it = *rec; it; it = it->next) { ++ if (it->base == base && ++ ((it->ofst >> 4) == (ofst >> 4)) && ++ ((it->ofst + it->size == ofst) || ++ (it->ofst - mem->reg.size == ofst))) { ++ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) ++ continue; ++ if (it->ofst < ofst) { ++ if ((it->ofst & 0xf) == 4) ++ continue; ++ } else ++ if ((ofst & 0xf) == 4) ++ continue; ++ break; ++ } ++ } ++ if (it) { ++ switch (ld->opcode) { ++ case NV_OP_EXPORT: combine_export(it, ld); break; ++ default: ++ combine_load(it, ld); ++ break; ++ } ++ } else ++ if (ctx->alloc < MEM_RECORD_POOL_SIZE) { ++ add_mem_record(ctx, rec, base, ofst, ld); ++ } ++ } ++ ++ DESCEND_ARBITRARY(s, nv_pass_mem_opt); ++ return 0; ++} ++ ++static void ++eliminate_store(struct mem_record *rec, struct nv_instruction *st) ++{ ++} ++ ++/* elimination of redundant stores */ ++static int ++pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) ++{ ++ struct mem_record **rec, *it; ++ struct nv_instruction *st, *next; ++ struct nv_value *mem; ++ uint32_t base, ofst, size; ++ int s; ++ ++ for (st = b->entry; st; st = next) { ++ next = st->next; ++ ++ if (st->opcode == NV_OP_ST) { ++ mem = st->src[0]->value; ++ rec = &ctx->mem_l; ++ } else ++ if (st->opcode == NV_OP_EXPORT) { ++ mem = st->src[0]->value; ++ if (mem->reg.file != NV_FILE_MEM_V) ++ continue; ++ rec = &ctx->mem_v; ++ } else ++ if (st->opcode == NV_OP_ST) { ++ /* TODO: purge */ ++ } ++ ofst = mem->reg.address; ++ base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0; ++ size = mem->reg.size; ++ ++ for (it = *rec; it; it = it->next) { ++ if (it->base == base && ++ (it->ofst <= ofst && (it->ofst + size) > ofst)) ++ break; ++ } ++ if (it) ++ eliminate_store(it, st); ++ else ++ add_mem_record(ctx, rec, base, ofst, st); ++ } ++ ++ DESCEND_ARBITRARY(s, nv_pass_mem_opt); ++ return 0; ++} ++ ++/* TODO: properly handle loads from l[] memory in the presence of stores */ ++static int ++nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) ++{ ++#if 0 ++ struct load_record **rec, *it; ++ struct nv_instruction *ld, *next; ++ uint64_t data[2]; ++ struct nv_value *val; ++ int j; ++ ++ for (ld = b->entry; ld; ld = next) { ++ next = ld->next; ++ if (!ld->src[0]) ++ continue; ++ val = ld->src[0]->value; ++ rec = NULL; ++ ++ if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { ++ data[0] = val->reg.id; ++ data[1] = 0; ++ rec = &ctx->mem_v; ++ } else ++ if (ld->opcode == NV_OP_LDA) { ++ data[0] = val->reg.id; ++ data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; ++ if (val->reg.file >= NV_FILE_MEM_C(0) && ++ val->reg.file <= NV_FILE_MEM_C(15)) ++ rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; ++ else ++ if (val->reg.file == NV_FILE_MEM_S) ++ rec = &ctx->mem_s; ++ else ++ if (val->reg.file == NV_FILE_MEM_L) ++ rec = &ctx->mem_l; ++ } else ++ if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { ++ data[0] = val->reg.imm.u32; ++ data[1] = 0; ++ rec = &ctx->imm; ++ } ++ ++ if (!rec || !ld->def[0]->refc) ++ continue; ++ ++ for (it = *rec; it; it = it->next) ++ if (it->data[0] == data[0] && it->data[1] == data[1]) ++ break; ++ ++ if (it) { ++ if (ld->def[0]->reg.id >= 0) ++ it->value = ld->def[0]; ++ else ++ if (!ld->fixed) ++ nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value); ++ } else { ++ if (ctx->alloc == LOAD_RECORD_POOL_SIZE) ++ continue; ++ it = &ctx->pool[ctx->alloc++]; ++ it->next = *rec; ++ it->data[0] = data[0]; ++ it->data[1] = data[1]; ++ it->value = ld->def[0]; ++ *rec = it; ++ } ++ } ++ ++ ctx->imm = NULL; ++ ctx->mem_s = NULL; ++ ctx->mem_v = NULL; ++ for (j = 0; j < 16; ++j) ++ ctx->mem_c[j] = NULL; ++ ctx->mem_l = NULL; ++ ctx->alloc = 0; ++ ++ DESCEND_ARBITRARY(j, nv_pass_reload_elim); ++#endif ++ return 0; ++} ++ ++static int ++nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b) ++{ ++ int i, c, j; ++ ++ for (i = 0; i < ctx->pc->num_instructions; ++i) { ++ struct nv_instruction *nvi = &ctx->pc->instructions[i]; ++ struct nv_value *def[4]; ++ ++ if (!nv_is_texture_op(nvi->opcode)) ++ continue; ++ nvi->tex_mask = 0; ++ ++ for (c = 0; c < 4; ++c) { ++ if (nvi->def[c]->refc) ++ nvi->tex_mask |= 1 << c; ++ def[c] = nvi->def[c]; ++ } ++ ++ j = 0; ++ for (c = 0; c < 4; ++c) ++ if (nvi->tex_mask & (1 << c)) ++ nvi->def[j++] = def[c]; ++ for (c = 0; c < 4; ++c) ++ if (!(nvi->tex_mask & (1 << c))) ++ nvi->def[j++] = def[c]; ++ assert(j == 4); ++ } ++ return 0; ++} ++ ++struct nv_pass_dce { ++ struct nv_pc *pc; ++ uint removed; ++}; ++ ++static int ++nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) ++{ ++ int j; ++ struct nv_instruction *nvi, *next; ++ ++ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) { ++ next = nvi->next; ++ ++ if (inst_removable(nvi)) { ++ nvc0_insn_delete(nvi); ++ ++ctx->removed; ++ } ++ } ++ DESCEND_ARBITRARY(j, nv_pass_dce); ++ ++ return 0; ++} ++ ++#if 0 ++/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. ++ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with ++ * BREAK and dummy ELSE block. ++ */ ++static INLINE boolean ++bb_is_if_else_endif(struct nv_basic_block *bb) ++{ ++ if (!bb->out[0] || !bb->out[1]) ++ return FALSE; ++ ++ if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) { ++ return (bb->out[0]->out[1] == bb->out[1]->out[0] && ++ !bb->out[1]->out[1]); ++ } else { ++ return (bb->out[0]->out[0] == bb->out[1]->out[0] && ++ !bb->out[0]->out[1] && ++ !bb->out[1]->out[1]); ++ } ++} ++ ++/* predicate instructions and remove branch at the end */ ++static void ++predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, ++ struct nv_value *p, ubyte cc) ++{ ++ ++} ++#endif ++ ++/* NOTE: Run this after register allocation, we can just cut out the cflow ++ * instructions and hook the predicates to the conditional OPs if they are ++ * not using immediates; better than inserting SELECT to join definitions. ++ * ++ * NOTE: Should adapt prior optimization to make this possible more often. ++ */ ++static int ++nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) ++{ ++ return 0; ++} ++ ++/* local common subexpression elimination, stupid O(n^2) implementation */ ++static int ++nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_instruction *ir, *ik, *next; ++ struct nv_instruction *entry = b->phi ? b->phi : b->entry; ++ int s; ++ unsigned int reps; ++ ++ do { ++ reps = 0; ++ for (ir = entry; ir; ir = next) { ++ next = ir->next; ++ for (ik = entry; ik != ir; ik = ik->next) { ++ if (ir->opcode != ik->opcode || ir->fixed) ++ continue; ++ ++ if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) ++ continue; ++ ++ if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) ++ continue; ++ ++ if (!values_equal(ik->def[0], ir->def[0])) ++ continue; ++ ++ for (s = 0; s < 3; ++s) { ++ struct nv_value *a, *b; ++ ++ if (!ik->src[s]) { ++ if (ir->src[s]) ++ break; ++ continue; ++ } ++ if (ik->src[s]->mod != ir->src[s]->mod) ++ break; ++ a = ik->src[s]->value; ++ b = ir->src[s]->value; ++ if (a == b) ++ continue; ++ if (a->reg.file != b->reg.file || ++ a->reg.id < 0 || ++ a->reg.id != b->reg.id) ++ break; ++ } ++ if (s == 3) { ++ nvc0_insn_delete(ir); ++ ++reps; ++ nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); ++ break; ++ } ++ } ++ } ++ } while(reps); ++ ++ DESCEND_ARBITRARY(s, nv_pass_cse); ++ ++ return 0; ++} ++ ++/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy ++ * neighbouring registers. CSE might have messed this up. ++ */ ++static int ++nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_value *val; ++ struct nv_instruction *bnd, *nvi, *next; ++ int s, t; ++ ++ for (bnd = b->entry; bnd; bnd = next) { ++ next = bnd->next; ++ if (bnd->opcode != NV_OP_BIND) ++ continue; ++ for (s = 0; s < 4 && bnd->src[s]; ++s) { ++ val = bnd->src[s]->value; ++ for (t = s + 1; t < 4 && bnd->src[t]; ++t) { ++ if (bnd->src[t]->value != val) ++ continue; ++ nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); ++ nvi->def[0] = new_value_like(ctx->pc, val); ++ nvi->def[0]->insn = nvi; ++ nv_reference(ctx->pc, nvi, 0, val); ++ nvc0_insn_insert_before(bnd, nvi); ++ ++ nv_reference(ctx->pc, bnd, t, nvi->def[0]); ++ } ++ } ++ } ++ DESCEND_ARBITRARY(t, nv_pass_fix_bind); ++ ++ return 0; ++} ++ ++static int ++nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) ++{ ++ struct pass_reld_elim *reldelim; ++ struct nv_pass pass; ++ struct nv_pass_dce dce; ++ int ret; ++ ++ pass.n = 0; ++ pass.pc = pc; ++ ++ /* Do this first, so we don't have to pay attention ++ * to whether sources are supported memory loads. ++ */ ++ pc->pass_seq++; ++ ret = nv_pass_lower_arith(&pass, root); ++ if (ret) ++ return ret; ++ ++ pc->pass_seq++; ++ ret = nv_pass_lower_mods(&pass, root); ++ if (ret) ++ return ret; ++ ++ pc->pass_seq++; ++ ret = nvc0_pass_fold_loads(&pass, root); ++ if (ret) ++ return ret; ++ ++ if (pc->opt_reload_elim) { ++ reldelim = CALLOC_STRUCT(pass_reld_elim); ++ reldelim->pc = pc; ++ ++ pc->pass_seq++; ++ ret = nv_pass_reload_elim(reldelim, root); ++ if (ret) { ++ FREE(reldelim); ++ return ret; ++ } ++ memset(reldelim, 0, sizeof(struct pass_reld_elim)); ++ reldelim->pc = pc; ++ } ++ ++ pc->pass_seq++; ++ ret = nv_pass_cse(&pass, root); ++ if (ret) ++ return ret; ++ ++ dce.pc = pc; ++ do { ++ dce.removed = 0; ++ pc->pass_seq++; ++ ret = nv_pass_dce(&dce, root); ++ if (ret) ++ return ret; ++ } while (dce.removed); ++ ++ if (pc->opt_reload_elim) { ++ pc->pass_seq++; ++ ret = nv_pass_mem_opt(reldelim, root); ++ if (!ret) { ++ memset(reldelim, 0, sizeof(struct pass_reld_elim)); ++ reldelim->pc = pc; ++ ++ pc->pass_seq++; ++ ret = nv_pass_mem_opt(reldelim, root); ++ } ++ FREE(reldelim); ++ if (ret) ++ return ret; ++ } ++ ++ ret = nv_pass_tex_mask(&pass, root); ++ if (ret) ++ return ret; ++ ++ pc->pass_seq++; ++ ret = nv_pass_fix_bind(&pass, root); ++ ++ return ret; ++} ++ ++int ++nvc0_pc_exec_pass0(struct nv_pc *pc) ++{ ++ int i, ret; ++ ++ for (i = 0; i < pc->num_subroutines + 1; ++i) ++ if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) ++ return ret; ++ return 0; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c +new file mode 100644 +index 0000000..b038264 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c +@@ -0,0 +1,377 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "nvc0_pc.h" ++ ++#define PRINT(args...) debug_printf(args) ++ ++#ifndef ARRAY_SIZE ++#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) ++#endif ++ ++static const char *norm = "\x1b[00m"; ++static const char *gree = "\x1b[32m"; ++static const char *blue = "\x1b[34m"; ++static const char *cyan = "\x1b[36m"; ++static const char *yllw = "\x1b[33m"; ++static const char *mgta = "\x1b[35m"; ++ ++static const char *nv_cond_names[] = ++{ ++ "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "", ++ "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "", ++ "o", "c", "a", "s" ++}; ++ ++static const char *nv_modifier_strings[] = ++{ ++ "", ++ "neg", ++ "abs", ++ "neg abs", ++ "not", ++ "not neg" ++ "not abs", ++ "not neg abs", ++ "sat", ++ "BAD_MOD" ++}; ++ ++const char * ++nvc0_opcode_name(uint opcode) ++{ ++ return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name; ++} ++ ++static INLINE const char * ++nv_type_name(ubyte type, ubyte size) ++{ ++ switch (type) { ++ case NV_TYPE_U16: return "u16"; ++ case NV_TYPE_S16: return "s16"; ++ case NV_TYPE_F32: return "f32"; ++ case NV_TYPE_U32: return "u32"; ++ case NV_TYPE_S32: return "s32"; ++ case NV_TYPE_P32: return "p32"; ++ case NV_TYPE_F64: return "f64"; ++ case NV_TYPE_ANY: ++ { ++ switch (size) { ++ case 1: return "b8"; ++ case 2: return "b16"; ++ case 4: return "b32"; ++ case 8: return "b64"; ++ case 12: return "b96"; ++ case 16: return "b128"; ++ default: ++ return "BAD_SIZE"; ++ } ++ } ++ default: ++ return "BAD_TYPE"; ++ } ++} ++ ++static INLINE const char * ++nv_cond_name(ubyte cc) ++{ ++ return nv_cond_names[MIN2(cc, 19)]; ++} ++ ++static INLINE const char * ++nv_modifier_string(ubyte mod) ++{ ++ return nv_modifier_strings[MIN2(mod, 9)]; ++} ++ ++static INLINE int ++nv_value_id(struct nv_value *value) ++{ ++ if (value->join->reg.id >= 0) ++ return value->join->reg.id; ++ return value->n; ++} ++ ++static INLINE boolean ++nv_value_allocated(struct nv_value *value) ++{ ++ return (value->reg.id >= 0) ? TRUE : FALSE; ++} ++ ++static INLINE void ++nv_print_address(const char c, int buf, struct nv_value *a, int offset) ++{ ++ const char ac = (a && nv_value_allocated(a)) ? '$' : '%'; ++ char sg; ++ ++ if (offset < 0) { ++ sg = '-'; ++ offset = -offset; ++ } else { ++ sg = '+'; ++ } ++ ++ if (buf >= 0) ++ PRINT(" %s%c%i[", cyan, c, buf); ++ else ++ PRINT(" %s%c[", cyan, c); ++ if (a) ++ PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg); ++ PRINT("%s0x%x%s]", yllw, offset, cyan); ++} ++ ++static INLINE void ++nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type) ++{ ++ char reg_pfx = nv_value_allocated(value->join) ? '$' : '%'; ++ ++ if (value->reg.file != NV_FILE_PRED) ++ PRINT(" %s%s", gree, nv_type_name(type, value->reg.size)); ++ ++ switch (value->reg.file) { ++ case NV_FILE_GPR: ++ PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value)); ++ if (value->reg.size == 8) ++ PRINT("d"); ++ if (value->reg.size == 16) ++ PRINT("q"); ++ break; ++ case NV_FILE_PRED: ++ PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value)); ++ break; ++ case NV_FILE_COND: ++ PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value)); ++ break; ++ case NV_FILE_MEM_L: ++ nv_print_address('l', -1, indir, value->reg.address); ++ break; ++ case NV_FILE_MEM_G: ++ nv_print_address('g', -1, indir, value->reg.address); ++ break; ++ case NV_FILE_MEM_A: ++ nv_print_address('a', -1, indir, value->reg.address); ++ break; ++ case NV_FILE_MEM_V: ++ nv_print_address('v', -1, indir, value->reg.address); ++ break; ++ case NV_FILE_IMM: ++ switch (type) { ++ case NV_TYPE_U16: ++ case NV_TYPE_S16: ++ PRINT(" %s0x%04x", yllw, value->reg.imm.u32); ++ break; ++ case NV_TYPE_F32: ++ PRINT(" %s%f", yllw, value->reg.imm.f32); ++ break; ++ case NV_TYPE_F64: ++ PRINT(" %s%f", yllw, value->reg.imm.f64); ++ break; ++ case NV_TYPE_U32: ++ case NV_TYPE_S32: ++ case NV_TYPE_P32: ++ case NV_TYPE_ANY: ++ PRINT(" %s0x%08x", yllw, value->reg.imm.u32); ++ break; ++ } ++ break; ++ default: ++ if (value->reg.file >= NV_FILE_MEM_C(0) && ++ value->reg.file <= NV_FILE_MEM_C(15)) ++ nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir, ++ value->reg.address); ++ else ++ NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value)); ++ break; ++ } ++} ++ ++static INLINE void ++nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type) ++{ ++ nv_print_value(ref->value, indir, type); ++} ++ ++void ++nvc0_print_instruction(struct nv_instruction *i) ++{ ++ int s; ++ ++ PRINT("%i: ", i->serial); ++ ++ if (i->predicate >= 0) { ++ PRINT("%s%s", gree, i->cc ? "fl" : "tr"); ++ nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8); ++ PRINT(" "); ++ } ++ ++ PRINT("%s", gree); ++ if (NV_BASEOP(i->opcode) == NV_OP_SET) ++ PRINT("set %s", nv_cond_name(i->set_cond)); ++ else ++ if (i->saturate) ++ PRINT("sat %s", nvc0_opcode_name(i->opcode)); ++ else ++ PRINT("%s", nvc0_opcode_name(i->opcode)); ++ ++ if (i->opcode == NV_OP_CVT) ++ nv_print_value(i->def[0], NULL, i->ext.cvt.d); ++ else ++ if (i->def[0]) ++ nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode)); ++ else ++ if (i->target) ++ PRINT(" %s(BB:%i)", yllw, i->target->id); ++ else ++ PRINT(" #"); ++ ++ for (s = 1; s < 4 && i->def[s]; ++s) ++ nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode)); ++ if (s > 1) ++ PRINT("%s ,", norm); ++ ++ for (s = 0; s < 6 && i->src[s]; ++s) { ++ ubyte type; ++ if (s == i->indirect || s == i->predicate) ++ continue; ++ if (i->opcode == NV_OP_CVT) ++ type = i->ext.cvt.s; ++ else ++ type = NV_OPTYPE(i->opcode); ++ ++ if (i->src[s]->mod) ++ PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod)); ++ ++ if (i->indirect >= 0 && ++ NV_IS_MEMORY_FILE(i->src[s]->value->reg.file)) ++ nv_print_ref(i->src[s], i->src[i->indirect]->value, type); ++ else ++ nv_print_ref(i->src[s], NULL, type); ++ } ++ PRINT(" %s\n", norm); ++} ++ ++#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG ++ ++struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = ++{ ++ { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, ++ { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, ++ { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, ++ { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, ++ { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, ++ { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, ++ ++ { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, ++ { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, ++ { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, ++ { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, ++ { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, ++ { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, ++ { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, ++ { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, ++ { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, ++ { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, ++ { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, ++ { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, ++ { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ ++ { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, ++ { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, ++ { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, ++ { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, ++ { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, ++ { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, ++ { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, ++ { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, ++ { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, ++ { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, ++ ++ { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, ++ ++ { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, ++ ++ { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, ++ ++ { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } ++}; +diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +new file mode 100644 +index 0000000..d24f09a +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +@@ -0,0 +1,927 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#define NOUVEAU_DEBUG 1 ++ ++/* #define NVC0_RA_DEBUG_LIVEI */ ++/* #define NVC0_RA_DEBUG_LIVE_SETS */ ++/* #define NVC0_RA_DEBUG_JOIN */ ++ ++#include "nvc0_pc.h" ++#include "util/u_simple_list.h" ++ ++#define NVC0_NUM_REGISTER_FILES 3 ++ ++/* @unit_shift: log2 of min allocation unit for register */ ++struct register_set { ++ uint32_t bits[NVC0_NUM_REGISTER_FILES][2]; ++ uint32_t last[NVC0_NUM_REGISTER_FILES]; ++ int log2_unit[NVC0_NUM_REGISTER_FILES]; ++ struct nv_pc *pc; ++}; ++ ++struct nv_pc_pass { ++ struct nv_pc *pc; ++ struct nv_instruction **insns; ++ uint num_insns; ++ uint pass_seq; ++}; ++ ++static void ++ranges_coalesce(struct nv_range *range) ++{ ++ while (range->next && range->end >= range->next->bgn) { ++ struct nv_range *rnn = range->next->next; ++ assert(range->bgn <= range->next->bgn); ++ range->end = MAX2(range->end, range->next->end); ++ FREE(range->next); ++ range->next = rnn; ++ } ++} ++ ++static boolean ++add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) ++{ ++ struct nv_range *range, **nextp = &val->livei; ++ ++ for (range = val->livei; range; range = range->next) { ++ if (end < range->bgn) ++ break; /* insert before */ ++ ++ if (bgn > range->end) { ++ nextp = &range->next; ++ continue; /* insert after */ ++ } ++ ++ /* overlap */ ++ if (bgn < range->bgn) { ++ range->bgn = bgn; ++ if (end > range->end) ++ range->end = end; ++ ranges_coalesce(range); ++ return TRUE; ++ } ++ if (end > range->end) { ++ range->end = end; ++ ranges_coalesce(range); ++ return TRUE; ++ } ++ assert(bgn >= range->bgn); ++ assert(end <= range->end); ++ return TRUE; ++ } ++ ++ if (!new_range) ++ new_range = CALLOC_STRUCT(nv_range); ++ ++ new_range->bgn = bgn; ++ new_range->end = end; ++ new_range->next = range; ++ *(nextp) = new_range; ++ return FALSE; ++} ++ ++static void ++add_range(struct nv_value *val, struct nv_basic_block *b, int end) ++{ ++ int bgn; ++ ++ if (!val->insn) /* ignore non-def values */ ++ return; ++ assert(b->entry->serial <= b->exit->serial); ++ assert(b->phi->serial <= end); ++ assert(b->exit->serial + 1 >= end); ++ ++ bgn = val->insn->serial; ++ if (bgn < b->entry->serial || bgn > b->exit->serial) ++ bgn = b->entry->serial; ++ ++ assert(bgn <= end); ++ ++ add_range_ex(val, bgn, end, NULL); ++} ++ ++#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI) ++static void ++livei_print(struct nv_value *a) ++{ ++ struct nv_range *r = a->livei; ++ ++ debug_printf("livei %i: ", a->n); ++ while (r) { ++ debug_printf("[%i, %i) ", r->bgn, r->end); ++ r = r->next; ++ } ++ debug_printf("\n"); ++} ++#endif ++ ++static void ++livei_unify(struct nv_value *dst, struct nv_value *src) ++{ ++ struct nv_range *range, *next; ++ ++ for (range = src->livei; range; range = next) { ++ next = range->next; ++ if (add_range_ex(dst, range->bgn, range->end, range)) ++ FREE(range); ++ } ++ src->livei = NULL; ++} ++ ++static void ++livei_release(struct nv_value *val) ++{ ++ struct nv_range *range, *next; ++ ++ for (range = val->livei; range; range = next) { ++ next = range->next; ++ FREE(range); ++ } ++} ++ ++static boolean ++livei_have_overlap(struct nv_value *a, struct nv_value *b) ++{ ++ struct nv_range *r_a, *r_b; ++ ++ for (r_a = a->livei; r_a; r_a = r_a->next) { ++ for (r_b = b->livei; r_b; r_b = r_b->next) { ++ if (r_b->bgn < r_a->end && ++ r_b->end > r_a->bgn) ++ return TRUE; ++ } ++ } ++ return FALSE; ++} ++ ++static int ++livei_end(struct nv_value *a) ++{ ++ struct nv_range *r = a->livei; ++ ++ assert(r); ++ while (r->next) ++ r = r->next; ++ return r->end; ++} ++ ++static boolean ++livei_contains(struct nv_value *a, int pos) ++{ ++ struct nv_range *r; ++ ++ for (r = a->livei; r && r->bgn <= pos; r = r->next) ++ if (r->end > pos) ++ return TRUE; ++ return FALSE; ++} ++ ++static boolean ++reg_assign(struct register_set *set, struct nv_value **def, int n) ++{ ++ int i, id, s, k; ++ uint32_t m; ++ int f = def[0]->reg.file; ++ ++ k = n; ++ if (k == 3) ++ k = 4; ++ s = (k * def[0]->reg.size) >> set->log2_unit[f]; ++ m = (1 << s) - 1; ++ ++ id = set->last[f]; ++ ++ for (i = 0; i * 32 < set->last[f]; ++i) { ++ if (set->bits[f][i] == 0xffffffff) ++ continue; ++ ++ for (id = 0; id < 32; id += s) ++ if (!(set->bits[f][i] & (m << id))) ++ break; ++ if (id < 32) ++ break; ++ } ++ if (i * 32 + id > set->last[f]) ++ return FALSE; ++ ++ set->bits[f][i] |= m << id; ++ ++ id += i * 32; ++ ++ set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1); ++ ++ for (i = 0; i < n; ++i) ++ if (def[i]->livei) ++ def[i]->reg.id = id++; ++ ++ return TRUE; ++} ++ ++static INLINE void ++reg_occupy(struct register_set *set, struct nv_value *val) ++{ ++ int id = val->reg.id, f = val->reg.file; ++ uint32_t m; ++ ++ if (id < 0) ++ return; ++ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; ++ ++ set->bits[f][id / 32] |= m << (id % 32); ++ ++ if (set->pc->max_reg[f] < id) ++ set->pc->max_reg[f] = id; ++} ++ ++static INLINE void ++reg_release(struct register_set *set, struct nv_value *val) ++{ ++ int id = val->reg.id, f = val->reg.file; ++ uint32_t m; ++ ++ if (id < 0) ++ return; ++ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; ++ ++ set->bits[f][id / 32] &= ~(m << (id % 32)); ++} ++ ++static INLINE boolean ++join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) ++{ ++ int i; ++ struct nv_value *val; ++ ++ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) ++ return FALSE; ++ ++ if (a->join->reg.id == b->join->reg.id) ++ return TRUE; ++ ++ /* either a or b or both have been assigned */ ++ ++ if (a->join->reg.id >= 0 && b->join->reg.id >= 0) ++ return FALSE; ++ else ++ if (b->join->reg.id >= 0) { ++ if (b->join->reg.id == 63) ++ return FALSE; ++ val = a; ++ a = b; ++ b = val; ++ } else ++ if (a->join->reg.id == 63) ++ return FALSE; ++ ++ for (i = 0; i < ctx->pc->num_values; ++i) { ++ val = &ctx->pc->values[i]; ++ ++ if (val->join->reg.id != a->join->reg.id) ++ continue; ++ if (val->join != a->join && livei_have_overlap(val->join, b->join)) ++ return FALSE; ++ } ++ return TRUE; ++} ++ ++static INLINE void ++do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) ++{ ++ int j; ++ struct nv_value *bjoin = b->join; ++ ++ if (b->join->reg.id >= 0) ++ a->join->reg.id = b->join->reg.id; ++ ++ livei_unify(a->join, b->join); ++ ++#ifdef NVC0_RA_DEBUG_JOIN ++ debug_printf("joining %i to %i\n", b->n, a->n); ++#endif ++ ++ /* make a->join the new representative */ ++ for (j = 0; j < ctx->pc->num_values; ++j) ++ if (ctx->pc->values[j].join == bjoin) ++ ctx->pc->values[j].join = a->join; ++ ++ assert(b->join == a->join); ++} ++ ++static INLINE void ++try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) ++{ ++ if (!join_allowed(ctx, a, b)) { ++#ifdef NVC0_RA_DEBUG_JOIN ++ debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); ++#endif ++ return; ++ } ++ if (livei_have_overlap(a->join, b->join)) { ++#ifdef NVC0_RA_DEBUG_JOIN ++ debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n); ++ livei_print(a); ++ livei_print(b); ++#endif ++ return; ++ } ++ ++ do_join_values(ctx, a, b); ++} ++ ++static INLINE boolean ++need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) ++{ ++ int i = 0, n = 0; ++ ++ for (; i < 2; ++i) ++ if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i])) ++ ++n; ++ ++ return (b->num_in > 1) && (n == 2); ++} ++ ++static int ++phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, ++ struct nv_basic_block *tb) ++{ ++ int i, j; ++ ++ for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { ++ if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb)) ++ continue; ++ /* NOTE: back-edges are ignored by the reachable-by check */ ++ if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb, ++ phi->src[i]->value->insn->bb, tb)) ++ j = i; ++ } ++ return j; ++} ++ ++/* For each operand of each PHI in b, generate a new value by inserting a MOV ++ * at the end of the block it is coming from and replace the operand with its ++ * result. This eliminates liveness conflicts and enables us to let values be ++ * copied to the right register if such a conflict exists nonetheless. ++ * ++ * These MOVs are also crucial in making sure the live intervals of phi srces ++ * are extended until the end of the loop, since they are not included in the ++ * live-in sets. ++ */ ++static int ++pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_instruction *i, *ni; ++ struct nv_value *val; ++ struct nv_basic_block *p, *pn; ++ int n, j; ++ ++ b->pass_seq = ctx->pc->pass_seq; ++ ++ for (n = 0; n < b->num_in; ++n) { ++ p = pn = b->in[n]; ++ assert(p); ++ ++ if (need_new_else_block(b, p)) { ++ pn = new_basic_block(ctx->pc); ++ ++ if (p->out[0] == b) ++ p->out[0] = pn; ++ else ++ p->out[1] = pn; ++ ++ if (p->exit->target == b) /* target to new else-block */ ++ p->exit->target = pn; ++ ++ b->in[n] = pn; ++ ++ pn->out[0] = b; ++ pn->in[0] = p; ++ pn->num_in = 1; ++ } ++ ctx->pc->current_block = pn; ++ ++ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { ++ if ((j = phi_opnd_for_bb(i, p, b)) < 0) ++ continue; ++ val = i->src[j]->value; ++ ++ if (i->src[j]->flags) { ++ /* value already encountered from a different in-block */ ++ val = val->insn->src[0]->value; ++ while (j < 6 && i->src[j]) ++ ++j; ++ assert(j < 6); ++ } ++ ++ ni = new_instruction(ctx->pc, NV_OP_MOV); ++ ++ /* TODO: insert instruction at correct position in the first place */ ++ if (ni->prev && ni->prev->target) ++ nvc0_insns_permute(ni->prev, ni); ++ ++ ni->def[0] = new_value_like(ctx->pc, val); ++ ni->def[0]->insn = ni; ++ nv_reference(ctx->pc, ni, 0, val); ++ nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ ++ i->src[j]->flags = 1; ++ } ++ ++ if (pn != p && pn->exit) { ++ ctx->pc->current_block = b->in[n ? 0 : 1]; ++ ni = new_instruction(ctx->pc, NV_OP_BRA); ++ ni->target = b; ++ ni->terminator = 1; ++ } ++ } ++ ++ for (j = 0; j < 2; ++j) ++ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) ++ pass_generate_phi_movs(ctx, b->out[j]); ++ ++ return 0; ++} ++ ++static int ++pass_join_values(struct nv_pc_pass *ctx, int iter) ++{ ++ int c, n; ++ ++ for (n = 0; n < ctx->num_insns; ++n) { ++ struct nv_instruction *i = ctx->insns[n]; ++ ++ switch (i->opcode) { ++ case NV_OP_PHI: ++ if (iter != 2) ++ break; ++ for (c = 0; c < 6 && i->src[c]; ++c) ++ try_join_values(ctx, i->def[0], i->src[c]->value); ++ break; ++ case NV_OP_MOV: ++ if ((iter == 2) && i->src[0]->value->insn && ++ !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) ++ try_join_values(ctx, i->def[0], i->src[0]->value); ++ break; ++ case NV_OP_SELECT: ++ if (iter != 1) ++ break; ++ for (c = 0; c < 6 && i->src[c]; ++c) { ++ assert(join_allowed(ctx, i->def[0], i->src[c]->value)); ++ do_join_values(ctx, i->def[0], i->src[c]->value); ++ } ++ break; ++ case NV_OP_TEX: ++ case NV_OP_TXB: ++ case NV_OP_TXL: ++ case NV_OP_TXQ: ++ /* on nvc0, TEX src and dst can differ */ ++ break; ++ case NV_OP_BIND: ++ if (iter) ++ break; ++ for (c = 0; c < 6 && i->src[c]; ++c) ++ do_join_values(ctx, i->def[c], i->src[c]->value); ++ break; ++ default: ++ break; ++ } ++ } ++ return 0; ++} ++ ++/* Order the instructions so that live intervals can be expressed in numbers. */ ++static void ++pass_order_instructions(void *priv, struct nv_basic_block *b) ++{ ++ struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv; ++ struct nv_instruction *i; ++ ++ b->pass_seq = ctx->pc->pass_seq; ++ ++ assert(!b->exit || !b->exit->next); ++ for (i = b->phi; i; i = i->next) { ++ i->serial = ctx->num_insns; ++ ctx->insns[ctx->num_insns++] = i; ++ } ++} ++ ++static void ++bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b) ++{ ++#ifdef NVC0_RA_DEBUG_LIVE_SETS ++ struct nv_value *val; ++ int j; ++ ++ debug_printf("LIVE-INs of BB:%i: ", b->id); ++ ++ for (j = 0; j < pc->num_values; ++j) { ++ if (!(b->live_set[j / 32] & (1 << (j % 32)))) ++ continue; ++ val = &pc->values[j]; ++ if (!val->insn) ++ continue; ++ debug_printf("%i ", val->n); ++ } ++ debug_printf("\n"); ++#endif ++} ++ ++static INLINE void ++live_set_add(struct nv_basic_block *b, struct nv_value *val) ++{ ++ if (!val->insn) /* don't add non-def values */ ++ return; ++ b->live_set[val->n / 32] |= 1 << (val->n % 32); ++} ++ ++static INLINE void ++live_set_rem(struct nv_basic_block *b, struct nv_value *val) ++{ ++ b->live_set[val->n / 32] &= ~(1 << (val->n % 32)); ++} ++ ++static INLINE boolean ++live_set_test(struct nv_basic_block *b, struct nv_ref *ref) ++{ ++ int n = ref->value->n; ++ return b->live_set[n / 32] & (1 << (n % 32)); ++} ++ ++/* The live set of a block contains those values that are live immediately ++ * before the beginning of the block, so do a backwards scan. ++ */ ++static int ++pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_instruction *i; ++ int j, n, ret = 0; ++ ++ if (b->pass_seq >= ctx->pc->pass_seq) ++ return 0; ++ b->pass_seq = ctx->pc->pass_seq; ++ ++ /* slight hack for undecidedness: set phi = entry if it's undefined */ ++ if (!b->phi) ++ b->phi = b->entry; ++ ++ for (n = 0; n < 2; ++n) { ++ if (!b->out[n] || b->out[n] == b) ++ continue; ++ ret = pass_build_live_sets(ctx, b->out[n]); ++ if (ret) ++ return ret; ++ ++ if (n == 0) { ++ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) ++ b->live_set[j] = b->out[n]->live_set[j]; ++ } else { ++ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) ++ b->live_set[j] |= b->out[n]->live_set[j]; ++ } ++ } ++ ++ if (!b->entry) ++ return 0; ++ ++ bb_live_set_print(ctx->pc, b); ++ ++ for (i = b->exit; i != b->entry->prev; i = i->prev) { ++ for (j = 0; j < 5 && i->def[j]; j++) ++ live_set_rem(b, i->def[j]); ++ for (j = 0; j < 6 && i->src[j]; j++) ++ live_set_add(b, i->src[j]->value); ++ } ++ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) ++ live_set_rem(b, i->def[0]); ++ ++ bb_live_set_print(ctx->pc, b); ++ ++ return 0; ++} ++ ++static void collect_live_values(struct nv_basic_block *b, const int n) ++{ ++ int i; ++ ++ if (b->out[0]) { ++ if (b->out[1]) { /* what to do about back-edges ? */ ++ for (i = 0; i < n; ++i) ++ b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; ++ } else { ++ memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); ++ } ++ } else ++ if (b->out[1]) { ++ memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); ++ } else { ++ memset(b->live_set, 0, n * sizeof(uint32_t)); ++ } ++} ++ ++/* NOTE: the live intervals of phi functions start at the first non-phi insn. */ ++static int ++pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b) ++{ ++ struct nv_instruction *i, *i_stop; ++ int j, s; ++ const int n = (ctx->pc->num_values + 31) / 32; ++ ++ /* verify that first block does not have live-in values */ ++ if (b->num_in == 0) ++ for (j = 0; j < n; ++j) ++ assert(b->live_set[j] == 0); ++ ++ collect_live_values(b, n); ++ ++ /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */ ++ for (j = 0; j < 2; ++j) { ++ if (!b->out[j] || !b->out[j]->phi) ++ continue; ++ for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) { ++ live_set_rem(b, i->def[0]); ++ ++ for (s = 0; s < 6 && i->src[s]; ++s) { ++ assert(i->src[s]->value->insn); ++ if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb, ++ b->out[j])) ++ live_set_add(b, i->src[s]->value); ++ else ++ live_set_rem(b, i->src[s]->value); ++ } ++ } ++ } ++ ++ /* remaining live-outs are live until the end */ ++ if (b->exit) { ++ for (j = 0; j < ctx->pc->num_values; ++j) { ++ if (!(b->live_set[j / 32] & (1 << (j % 32)))) ++ continue; ++ add_range(&ctx->pc->values[j], b, b->exit->serial + 1); ++#ifdef NVC0_RA_DEBUG_LIVEI ++ debug_printf("adding range for live value %i: ", j); ++ livei_print(&ctx->pc->values[j]); ++#endif ++ } ++ } ++ ++ i_stop = b->entry ? b->entry->prev : NULL; ++ ++ /* don't have to include phi functions here (will have 0 live range) */ ++ for (i = b->exit; i != i_stop; i = i->prev) { ++ assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial); ++ for (j = 0; j < 4 && i->def[j]; ++j) ++ live_set_rem(b, i->def[j]); ++ ++ for (j = 0; j < 6 && i->src[j]; ++j) { ++ if (!live_set_test(b, i->src[j])) { ++ live_set_add(b, i->src[j]->value); ++ add_range(i->src[j]->value, b, i->serial); ++#ifdef NVC0_RA_DEBUG_LIVEI ++ debug_printf("adding range for source %i (ends living): ", ++ i->src[j]->value->n); ++ livei_print(i->src[j]->value); ++#endif ++ } ++ } ++ } ++ ++ b->pass_seq = ctx->pc->pass_seq; ++ ++ if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) ++ pass_build_intervals(ctx, b->out[0]); ++ ++ if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) ++ pass_build_intervals(ctx, b->out[1]); ++ ++ return 0; ++} ++ ++static INLINE void ++nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) ++{ ++ memset(set, 0, sizeof(*set)); ++ ++ set->last[NV_FILE_GPR] = 62; ++ set->last[NV_FILE_PRED] = 6; ++ set->last[NV_FILE_COND] = 1; ++ ++ set->log2_unit[NV_FILE_GPR] = 2; ++ set->log2_unit[NV_FILE_COND] = 0; ++ set->log2_unit[NV_FILE_PRED] = 0; ++ ++ set->pc = pc; ++} ++ ++static void ++insert_ordered_tail(struct nv_value *list, struct nv_value *nval) ++{ ++ struct nv_value *elem; ++ ++ for (elem = list->prev; ++ elem != list && elem->livei->bgn > nval->livei->bgn; ++ elem = elem->prev); ++ /* now elem begins before or at the same time as val */ ++ ++ nval->prev = elem; ++ nval->next = elem->next; ++ elem->next->prev = nval; ++ elem->next = nval; ++} ++ ++static int ++pass_linear_scan(struct nv_pc_pass *ctx, int iter) ++{ ++ struct nv_instruction *i; ++ struct register_set f, free; ++ int k, n; ++ struct nv_value *cur, *val, *tmp[2]; ++ struct nv_value active, inactive, handled, unhandled; ++ ++ make_empty_list(&active); ++ make_empty_list(&inactive); ++ make_empty_list(&handled); ++ make_empty_list(&unhandled); ++ ++ nvc0_ctor_register_set(ctx->pc, &free); ++ ++ /* joined values should have range = NULL and thus not be added; ++ * also, fixed memory values won't be added because they're not ++ * def'd, just used ++ */ ++ for (n = 0; n < ctx->num_insns; ++n) { ++ i = ctx->insns[n]; ++ ++ for (k = 0; k < 5; ++k) { ++ if (i->def[k] && i->def[k]->livei) ++ insert_ordered_tail(&unhandled, i->def[k]); ++ else ++ if (0 && i->def[k]) ++ debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); ++ } ++ } ++ ++ for (val = unhandled.next; val != unhandled.prev; val = val->next) { ++ assert(val->join == val); ++ assert(val->livei->bgn <= val->next->livei->bgn); ++ } ++ ++ foreach_s(cur, tmp[0], &unhandled) { ++ remove_from_list(cur); ++ ++ foreach_s(val, tmp[1], &active) { ++ if (livei_end(val) <= cur->livei->bgn) { ++ reg_release(&free, val); ++ move_to_head(&handled, val); ++ } else ++ if (!livei_contains(val, cur->livei->bgn)) { ++ reg_release(&free, val); ++ move_to_head(&inactive, val); ++ } ++ } ++ ++ foreach_s(val, tmp[1], &inactive) { ++ if (livei_end(val) <= cur->livei->bgn) ++ move_to_head(&handled, val); ++ else ++ if (livei_contains(val, cur->livei->bgn)) { ++ reg_occupy(&free, val); ++ move_to_head(&active, val); ++ } ++ } ++ ++ f = free; ++ ++ foreach(val, &inactive) ++ if (livei_have_overlap(val, cur)) ++ reg_occupy(&f, val); ++ ++ foreach(val, &unhandled) ++ if (val->reg.id >= 0 && livei_have_overlap(val, cur)) ++ reg_occupy(&f, val); ++ ++ if (cur->reg.id < 0) { ++ boolean mem = FALSE; ++ int v = nvi_vector_size(cur->insn); ++ ++ if (v > 1) ++ mem = !reg_assign(&f, &cur->insn->def[0], v); ++ else ++ if (iter) ++ mem = !reg_assign(&f, &cur, 1); ++ ++ if (mem) { ++ NOUVEAU_ERR("out of registers\n"); ++ abort(); ++ } ++ } ++ insert_at_head(&active, cur); ++ reg_occupy(&free, cur); ++ } ++ ++ return 0; ++} ++ ++static int ++nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) ++{ ++ struct nv_pc_pass *ctx; ++ int i, ret; ++ ++ NOUVEAU_DBG("REGISTER ALLOCATION - entering\n"); ++ ++ ctx = CALLOC_STRUCT(nv_pc_pass); ++ if (!ctx) ++ return -1; ++ ctx->pc = pc; ++ ++ ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *)); ++ if (!ctx->insns) { ++ FREE(ctx); ++ return -1; ++ } ++ ++ pc->pass_seq++; ++ ret = pass_generate_phi_movs(ctx, root); ++ assert(!ret); ++ ++ for (i = 0; i < pc->loop_nesting_bound; ++i) { ++ pc->pass_seq++; ++ ret = pass_build_live_sets(ctx, root); ++ assert(!ret && "live sets"); ++ if (ret) { ++ NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i); ++ goto out; ++ } ++ } ++ ++ pc->pass_seq++; ++ nvc0_pc_pass_in_order(root, pass_order_instructions, ctx); ++ ++ pc->pass_seq++; ++ ret = pass_build_intervals(ctx, root); ++ assert(!ret && "build intervals"); ++ if (ret) { ++ NOUVEAU_ERR("failed to build live intervals\n"); ++ goto out; ++ } ++ ++#ifdef NVC0_RA_DEBUG_LIVEI ++ for (i = 0; i < pc->num_values; ++i) ++ livei_print(&pc->values[i]); ++#endif ++ ++ ret = pass_join_values(ctx, 0); ++ if (ret) ++ goto out; ++ ret = pass_linear_scan(ctx, 0); ++ if (ret) ++ goto out; ++ ret = pass_join_values(ctx, 1); ++ if (ret) ++ goto out; ++ ret = pass_join_values(ctx, 2); ++ if (ret) ++ goto out; ++ ret = pass_linear_scan(ctx, 1); ++ if (ret) ++ goto out; ++ ++ for (i = 0; i < pc->num_values; ++i) ++ livei_release(&pc->values[i]); ++ ++ NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n"); ++ ++out: ++ FREE(ctx->insns); ++ FREE(ctx); ++ return ret; ++} ++ ++int ++nvc0_pc_exec_pass1(struct nv_pc *pc) ++{ ++ int i, ret; ++ ++ for (i = 0; i < pc->num_subroutines + 1; ++i) ++ if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i]))) ++ return ret; ++ return 0; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c +new file mode 100644 +index 0000000..aefaf7b +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_program.c +@@ -0,0 +1,694 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "pipe/p_shader_tokens.h" ++#include "pipe/p_defines.h" ++ ++#define NOUVEAU_DEBUG ++ ++#include "tgsi/tgsi_parse.h" ++#include "tgsi/tgsi_util.h" ++#include "tgsi/tgsi_dump.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_pc.h" ++ ++static unsigned ++nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) ++{ ++ unsigned mask = inst->Dst[0].Register.WriteMask; ++ ++ switch (inst->Instruction.Opcode) { ++ case TGSI_OPCODE_COS: ++ case TGSI_OPCODE_SIN: ++ return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); ++ case TGSI_OPCODE_DP3: ++ return 0x7; ++ case TGSI_OPCODE_DP4: ++ case TGSI_OPCODE_DPH: ++ case TGSI_OPCODE_KIL: /* WriteMask ignored */ ++ return 0xf; ++ case TGSI_OPCODE_DST: ++ return mask & (c ? 0xa : 0x6); ++ case TGSI_OPCODE_EX2: ++ case TGSI_OPCODE_EXP: ++ case TGSI_OPCODE_LG2: ++ case TGSI_OPCODE_LOG: ++ case TGSI_OPCODE_POW: ++ case TGSI_OPCODE_RCP: ++ case TGSI_OPCODE_RSQ: ++ case TGSI_OPCODE_SCS: ++ return 0x1; ++ case TGSI_OPCODE_IF: ++ return 0x1; ++ case TGSI_OPCODE_LIT: ++ return 0xb; ++ case TGSI_OPCODE_TEX: ++ case TGSI_OPCODE_TXB: ++ case TGSI_OPCODE_TXL: ++ case TGSI_OPCODE_TXP: ++ { ++ const struct tgsi_instruction_texture *tex; ++ ++ assert(inst->Instruction.Texture); ++ tex = &inst->Texture; ++ ++ mask = 0x7; ++ if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && ++ inst->Instruction.Opcode != TGSI_OPCODE_TXD) ++ mask |= 0x8; /* bias, lod or proj */ ++ ++ switch (tex->Texture) { ++ case TGSI_TEXTURE_1D: ++ mask &= 0x9; ++ break; ++ case TGSI_TEXTURE_SHADOW1D: ++ mask &= 0x5; ++ break; ++ case TGSI_TEXTURE_2D: ++ mask &= 0xb; ++ break; ++ default: ++ break; ++ } ++ } ++ return mask; ++ case TGSI_OPCODE_XPD: ++ { ++ unsigned x = 0; ++ if (mask & 1) x |= 0x6; ++ if (mask & 2) x |= 0x5; ++ if (mask & 4) x |= 0x3; ++ return x; ++ } ++ default: ++ break; ++ } ++ ++ return mask; ++} ++ ++static void ++nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id) ++{ ++ int i, c; ++ ++ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) ++ for (c = 0; c < 4; ++c) ++ ti->input_access[i][c] = id; ++ ++ ti->indirect_inputs = TRUE; ++} ++ ++static void ++nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) ++{ ++ int i, c; ++ ++ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) ++ for (c = 0; c < 4; ++c) ++ ti->output_access[i][c] = id; ++ ++ ti->indirect_outputs = TRUE; ++} ++ ++static INLINE unsigned ++nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input) ++{ ++ /* NOTE: locations 0xfxx indicate special regs */ ++ switch (sn) { ++ /* ++ case TGSI_SEMANTIC_VERTEXID: ++ *is_input = TRUE; ++ return 0x2fc; ++ */ ++ case TGSI_SEMANTIC_PRIMID: ++ *is_input = TRUE; ++ return 0x60; ++ /* ++ case TGSI_SEMANTIC_LAYER_INDEX: ++ return 0x64; ++ case TGSI_SEMANTIC_VIEWPORT_INDEX: ++ return 0x68; ++ */ ++ case TGSI_SEMANTIC_INSTANCEID: ++ *is_input = TRUE; ++ return 0x2f8; ++ case TGSI_SEMANTIC_FACE: ++ *is_input = TRUE; ++ return 0x3fc; ++ /* ++ case TGSI_SEMANTIC_INVOCATIONID: ++ return 0xf11; ++ */ ++ default: ++ assert(0); ++ return 0x000; ++ } ++} ++ ++static INLINE unsigned ++nvc0_varying_location(unsigned sn, unsigned si) ++{ ++ switch (sn) { ++ case TGSI_SEMANTIC_POSITION: ++ return 0x70; ++ case TGSI_SEMANTIC_COLOR: ++ return 0x280 + (si * 16); /* are these hard-wired ? */ ++ case TGSI_SEMANTIC_BCOLOR: ++ return 0x2a0 + (si * 16); ++ case TGSI_SEMANTIC_FOG: ++ return 0x270; ++ case TGSI_SEMANTIC_PSIZE: ++ return 0x6c; ++ /* ++ case TGSI_SEMANTIC_PNTC: ++ return 0x2e0; ++ */ ++ case TGSI_SEMANTIC_GENERIC: ++ assert(si < 31); ++ return 0x80 + (si * 16); ++ case TGSI_SEMANTIC_NORMAL: ++ return 0x360; ++ case TGSI_SEMANTIC_PRIMID: ++ return 0x40; ++ case TGSI_SEMANTIC_FACE: ++ return 0x3fc; ++ /* ++ case TGSI_SEMANTIC_CLIP_DISTANCE: ++ return 0x2c0 + (si * 4); ++ */ ++ default: ++ assert(0); ++ return 0x000; ++ } ++} ++ ++static INLINE unsigned ++nvc0_interp_mode(const struct tgsi_full_declaration *decl) ++{ ++ unsigned mode; ++ ++ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) ++ mode = NVC0_INTERP_FLAT; ++ else ++ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) ++ mode = NVC0_INTERP_PERSPECTIVE; ++ else ++ mode = NVC0_INTERP_LINEAR; ++ ++ if (decl->Declaration.Centroid) ++ mode |= NVC0_INTERP_CENTROID; ++ ++ return mode; ++} ++ ++static void ++prog_immediate(struct nvc0_translation_info *ti, ++ const struct tgsi_full_immediate *imm) ++{ ++ int c; ++ unsigned n = ti->immd32_nr++; ++ ++ assert(ti->immd32_nr <= ti->scan.immediate_count); ++ ++ for (c = 0; c < 4; ++c) ++ ti->immd32[n * 4 + c] = imm->u[c].Uint; ++ ++ ti->immd32_ty[n] = imm->Immediate.DataType; ++} ++ ++static boolean ++prog_decl(struct nvc0_translation_info *ti, ++ const struct tgsi_full_declaration *decl) ++{ ++ unsigned i, c; ++ unsigned sn = TGSI_SEMANTIC_GENERIC; ++ unsigned si = 0; ++ const unsigned first = decl->Range.First; ++ const unsigned last = decl->Range.Last; ++ ++ if (decl->Declaration.Semantic) { ++ sn = decl->Semantic.Name; ++ si = decl->Semantic.Index; ++ } ++ ++ switch (decl->Declaration.File) { ++ case TGSI_FILE_INPUT: ++ for (i = first; i <= last; ++i) { ++ if (ti->prog->type == PIPE_SHADER_VERTEX) { ++ sn = TGSI_SEMANTIC_GENERIC; ++ si = i; ++ } ++ for (c = 0; c < 4; ++c) ++ ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; ++ ++ if (ti->prog->type == PIPE_SHADER_FRAGMENT) ++ ti->interp_mode[i] = nvc0_interp_mode(decl); ++ } ++ break; ++ case TGSI_FILE_OUTPUT: ++ for (i = first; i <= last; ++i, ++si) { ++ if (ti->prog->type == PIPE_SHADER_FRAGMENT) { ++ si = i; ++ if (i == ti->fp_depth_output) { ++ ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4; ++ } else { ++ if (i > ti->fp_depth_output) ++ si -= 1; ++ for (c = 0; c < 4; ++c) ++ ti->output_loc[i][c] = si * 4 + c; ++ } ++ } else { ++ for (c = 0; c < 4; ++c) ++ ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; ++ } ++ } ++ break; ++ case TGSI_FILE_SYSTEM_VALUE: ++ ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); ++ assert(first == last); ++ break; ++ case TGSI_FILE_NULL: ++ case TGSI_FILE_CONSTANT: ++ case TGSI_FILE_TEMPORARY: ++ case TGSI_FILE_SAMPLER: ++ case TGSI_FILE_ADDRESS: ++ case TGSI_FILE_IMMEDIATE: ++ case TGSI_FILE_PREDICATE: ++ break; ++ default: ++ NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File); ++ return FALSE; ++ } ++ return TRUE; ++} ++ ++static void ++prog_inst(struct nvc0_translation_info *ti, ++ const struct tgsi_full_instruction *inst, int id) ++{ ++ const struct tgsi_dst_register *dst; ++ const struct tgsi_src_register *src; ++ int s, c, k; ++ unsigned mask; ++ ++ if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { ++ ti->subr[ti->num_subrs].first_insn = id - 1; ++ ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */ ++ ++ti->num_subrs; ++ } ++ ++ if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { ++ dst = &inst->Dst[0].Register; ++ ++ for (c = 0; c < 4; ++c) { ++ if (dst->Indirect) ++ nvc0_indirect_outputs(ti, id); ++ if (!(dst->WriteMask & (1 << c))) ++ continue; ++ ti->output_access[dst->Index][c] = id; ++ } ++ ++ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && ++ inst->Src[0].Register.File == TGSI_FILE_INPUT && ++ dst->Index == ti->edgeflag_out) ++ ti->prog->vp.edgeflag = inst->Src[0].Register.Index; ++ } else ++ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { ++ if (inst->Dst[0].Register.Indirect) ++ ti->require_stores = TRUE; ++ } ++ ++ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { ++ src = &inst->Src[s].Register; ++ if (src->File == TGSI_FILE_TEMPORARY) ++ if (inst->Src[s].Register.Indirect) ++ ti->require_stores = TRUE; ++ if (src->File != TGSI_FILE_INPUT) ++ continue; ++ mask = nvc0_tgsi_src_mask(inst, s); ++ ++ if (inst->Src[s].Register.Indirect) ++ nvc0_indirect_inputs(ti, id); ++ ++ for (c = 0; c < 4; ++c) { ++ if (!(mask & (1 << c))) ++ continue; ++ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); ++ if (k <= TGSI_SWIZZLE_W) ++ ti->input_access[src->Index][k] = id; ++ } ++ } ++} ++ ++/* Probably should introduce something like struct tgsi_function_declaration ++ * instead of trying to guess inputs/outputs. ++ */ ++static void ++prog_subroutine_inst(struct nvc0_subroutine *subr, ++ const struct tgsi_full_instruction *inst) ++{ ++ const struct tgsi_dst_register *dst; ++ const struct tgsi_src_register *src; ++ int s, c, k; ++ unsigned mask; ++ ++ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { ++ src = &inst->Src[s].Register; ++ if (src->File != TGSI_FILE_TEMPORARY) ++ continue; ++ mask = nvc0_tgsi_src_mask(inst, s); ++ ++ for (c = 0; c < 4; ++c) { ++ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); ++ ++ if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) ++ if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) ++ subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); ++ } ++ } ++ ++ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { ++ dst = &inst->Dst[0].Register; ++ ++ for (c = 0; c < 4; ++c) ++ if (dst->WriteMask & (1 << c)) ++ subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); ++ } ++} ++ ++static int ++nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) ++{ ++ int i, c; ++ unsigned a; ++ ++ for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { ++ for (c = 0; c < 4; ++c, ++a) ++ if (ti->input_access[i][c]) ++ vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */ ++ } ++ ++ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { ++ a = (ti->output_loc[i][0] - 0x40) / 4; ++ for (c = 0; c < 4; ++c, ++a) { ++ if (!ti->output_access[i][c]) ++ continue; ++ vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */ ++ } ++ } ++ ++ for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { ++ a = ti->sysval_loc[i] / 4; ++ if (a > 0 && a < (0xf00 / 4)) ++ vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32); ++ } ++ ++ return 0; ++} ++ ++static int ++nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) ++{ ++ vp->hdr[0] = 0x20461; ++ vp->hdr[4] = 0xff000; ++ ++ vp->hdr[18] = (1 << vp->vp.num_ucps) - 1; ++ ++ return nvc0_vp_gp_gen_header(vp, ti); ++} ++ ++static int ++nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) ++{ ++ unsigned invocations = 1; ++ unsigned max_output_verts, output_prim; ++ unsigned i; ++ ++ gp->hdr[0] = 0x21061; ++ ++ for (i = 0; i < ti->scan.num_properties; ++i) { ++ switch (ti->scan.properties[i].name) { ++ case TGSI_PROPERTY_GS_OUTPUT_PRIM: ++ output_prim = ti->scan.properties[i].data[0]; ++ break; ++ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: ++ max_output_verts = ti->scan.properties[i].data[0]; ++ assert(max_output_verts < 512); ++ break; ++ /* ++ case TGSI_PROPERTY_GS_INVOCATIONS: ++ invocations = ti->scan.properties[i].data[0]; ++ assert(invocations <= 32); ++ break; ++ */ ++ default: ++ break; ++ } ++ } ++ ++ gp->hdr[2] = MIN2(invocations, 32) << 24; ++ ++ switch (output_prim) { ++ case PIPE_PRIM_POINTS: ++ gp->hdr[3] = 0x01000000; ++ gp->hdr[0] |= 0xf0000000; ++ break; ++ case PIPE_PRIM_LINE_STRIP: ++ gp->hdr[3] = 0x06000000; ++ gp->hdr[0] |= 0x10000000; ++ break; ++ case PIPE_PRIM_TRIANGLE_STRIP: ++ gp->hdr[3] = 0x07000000; ++ gp->hdr[0] |= 0x10000000; ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ ++ gp->hdr[4] = max_output_verts & 0x1ff; ++ ++ return nvc0_vp_gp_gen_header(gp, ti); ++} ++ ++static int ++nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) ++{ ++ int i, c; ++ unsigned a, m; ++ ++ fp->hdr[0] = 0x21462; ++ fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ ++ ++ if (ti->scan.uses_kill) ++ fp->hdr[0] |= 0x8000; ++ if (ti->scan.writes_z) { ++ fp->hdr[19] |= 0x2; ++ if (ti->scan.num_outputs > 2) ++ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ ++ } else { ++ if (ti->scan.num_outputs > 1) ++ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ ++ } ++ ++ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { ++ m = ti->interp_mode[i]; ++ for (c = 0; c < 4; ++c) { ++ if (!ti->input_access[i][c]) ++ continue; ++ a = ti->input_loc[i][c] / 2; ++ if ((a & ~7) == 0x70/2) ++ fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ ++ else ++ fp->hdr[4 + a / 32] |= m << (a % 32); ++ } ++ } ++ ++ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { ++ if (i != ti->fp_depth_output) ++ fp->hdr[18] |= 0xf << ti->output_loc[i][0]; ++ } ++ ++ for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { ++ a = ti->sysval_loc[i] / 2; ++ if ((a > 0) && (a < 0xf00 / 2)) ++ fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32); ++ } ++ ++ return 0; ++} ++ ++static boolean ++nvc0_prog_scan(struct nvc0_translation_info *ti) ++{ ++ struct nvc0_program *prog = ti->prog; ++ struct tgsi_parse_context parse; ++ int ret; ++ unsigned i; ++ ++#ifdef NOUVEAU_DEBUG ++ tgsi_dump(prog->pipe.tokens, 0); ++#endif ++ ++ tgsi_scan_shader(prog->pipe.tokens, &ti->scan); ++ ++ if (ti->prog->type == PIPE_SHADER_FRAGMENT) { ++ ti->fp_depth_output = 255; ++ for (i = 0; i < ti->scan.num_outputs; ++i) ++ if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION) ++ ti->fp_depth_output = i; ++ } ++ ++ ti->subr = ++ CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); ++ ++ ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); ++ ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); ++ ++ ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); ++ ++ tgsi_parse_init(&parse, prog->pipe.tokens); ++ while (!tgsi_parse_end_of_tokens(&parse)) { ++ tgsi_parse_token(&parse); ++ ++ switch (parse.FullToken.Token.Type) { ++ case TGSI_TOKEN_TYPE_IMMEDIATE: ++ prog_immediate(ti, &parse.FullToken.FullImmediate); ++ break; ++ case TGSI_TOKEN_TYPE_DECLARATION: ++ prog_decl(ti, &parse.FullToken.FullDeclaration); ++ break; ++ case TGSI_TOKEN_TYPE_INSTRUCTION: ++ ti->insns[ti->num_insns] = parse.FullToken.FullInstruction; ++ prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ for (i = 0; i < ti->num_subrs; ++i) { ++ unsigned pc = ti->subr[i].id; ++ while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) ++ prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); ++ } ++ ++ switch (prog->type) { ++ case PIPE_SHADER_VERTEX: ++ ti->input_file = NV_FILE_MEM_A; ++ ti->output_file = NV_FILE_MEM_V; ++ ret = nvc0_vp_gen_header(prog, ti); ++ break; ++ /* ++ case PIPE_SHADER_TESSELLATION_CONTROL: ++ ret = nvc0_tcp_gen_header(ti); ++ break; ++ case PIPE_SHADER_TESSELLATION_EVALUATION: ++ ret = nvc0_tep_gen_header(ti); ++ break; ++ case PIPE_SHADER_GEOMETRY: ++ ret = nvc0_gp_gen_header(ti); ++ break; ++ */ ++ case PIPE_SHADER_FRAGMENT: ++ ti->input_file = NV_FILE_MEM_V; ++ ti->output_file = NV_FILE_GPR; ++ ++ if (ti->scan.writes_z) ++ prog->flags[0] = 0x11; /* ? */ ++ else ++ if (!ti->global_stores) ++ prog->fp.early_z = 1; ++ ++ ret = nvc0_fp_gen_header(prog, ti); ++ break; ++ default: ++ assert(!"unsupported program type"); ++ ret = -1; ++ break; ++ } ++ ++ assert(!ret); ++ return ret; ++} ++ ++boolean ++nvc0_program_translate(struct nvc0_program *prog) ++{ ++ struct nvc0_translation_info *ti; ++ int ret; ++ ++ ti = CALLOC_STRUCT(nvc0_translation_info); ++ ti->prog = prog; ++ ++ ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; ++ ++ if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps) ++ ti->append_ucp = TRUE; ++ ++ ret = nvc0_prog_scan(ti); ++ if (ret) { ++ NOUVEAU_ERR("unsupported shader program\n"); ++ goto out; ++ } ++ ++ ret = nvc0_generate_code(ti); ++ if (ret) ++ NOUVEAU_ERR("shader translation failed\n"); ++ ++ { ++ unsigned i; ++ for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) ++ debug_printf("HDR[%02lx] = 0x%08x\n", ++ i * sizeof(prog->hdr[0]), prog->hdr[i]); ++ } ++ ++out: ++ if (ti->immd32) ++ FREE(ti->immd32); ++ if (ti->immd32_ty) ++ FREE(ti->immd32_ty); ++ if (ti->insns) ++ FREE(ti->insns); ++ if (ti->subr) ++ FREE(ti->subr); ++ FREE(ti); ++ return ret ? FALSE : TRUE; ++} ++ ++void ++nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) ++{ ++ if (prog->res) ++ nouveau_resource_free(&prog->res); ++ ++ if (prog->code) ++ FREE(prog->code); ++ if (prog->relocs) ++ FREE(prog->relocs); ++ ++ memset(prog->hdr, 0, sizeof(prog->hdr)); ++ ++ prog->translated = FALSE; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h +new file mode 100644 +index 0000000..e6b210d +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_program.h +@@ -0,0 +1,89 @@ ++ ++#ifndef __NVC0_PROGRAM_H__ ++#define __NVC0_PROGRAM_H__ ++ ++#include "pipe/p_state.h" ++#include "tgsi/tgsi_scan.h" ++ ++#define NVC0_CAP_MAX_PROGRAM_TEMPS 64 ++ ++#define NVC0_SHADER_HEADER_SIZE (20 * 4) ++ ++struct nvc0_program { ++ struct pipe_shader_state pipe; ++ ++ ubyte type; ++ boolean translated; ++ ubyte max_gpr; ++ ++ uint32_t *code; ++ unsigned code_base; ++ unsigned code_size; ++ unsigned parm_size; ++ ++ uint32_t hdr[20]; ++ ++ uint32_t flags[2]; ++ ++ struct { ++ uint8_t edgeflag; ++ uint8_t num_ucps; ++ } vp; ++ struct { ++ uint8_t early_z; ++ } fp; ++ ++ void *relocs; ++ unsigned num_relocs; ++ ++ struct nouveau_resource *res; ++}; ++ ++/* first 2 bits are written into the program header, for each input */ ++#define NVC0_INTERP_FLAT (1 << 0) ++#define NVC0_INTERP_PERSPECTIVE (2 << 0) ++#define NVC0_INTERP_LINEAR (3 << 0) ++#define NVC0_INTERP_CENTROID (1 << 2) ++ ++/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ ++struct nvc0_subroutine { ++ unsigned id; ++ unsigned first_insn; ++ uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; ++ uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; ++}; ++ ++struct nvc0_translation_info { ++ struct nvc0_program *prog; ++ struct tgsi_full_instruction *insns; ++ unsigned num_insns; ++ ubyte input_file; ++ ubyte output_file; ++ ubyte fp_depth_output; ++ uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4]; ++ uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4]; ++ uint16_t sysval_loc[TGSI_SEMANTIC_COUNT]; ++ boolean sysval_in[TGSI_SEMANTIC_COUNT]; ++ int input_access[PIPE_MAX_SHADER_INPUTS][4]; ++ int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; ++ ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; ++ boolean indirect_inputs; ++ boolean indirect_outputs; ++ boolean require_stores; ++ boolean global_stores; ++ uint32_t *immd32; ++ ubyte *immd32_ty; ++ unsigned immd32_nr; ++ ubyte edgeflag_out; ++ struct nvc0_subroutine *subr; ++ unsigned num_subrs; ++ boolean append_ucp; ++ struct tgsi_shader_info scan; ++}; ++ ++int nvc0_generate_code(struct nvc0_translation_info *); ++ ++void nvc0_relocate_program(struct nvc0_program *, ++ uint32_t code_base, uint32_t data_base); ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c +new file mode 100644 +index 0000000..74c3451 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_push.c +@@ -0,0 +1,289 @@ ++ ++#include "pipe/p_context.h" ++#include "pipe/p_state.h" ++#include "util/u_inlines.h" ++#include "util/u_format.h" ++#include "translate/translate.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_resource.h" ++ ++#include "nvc0_3d.xml.h" ++ ++struct push_context { ++ struct nouveau_channel *chan; ++ ++ void *idxbuf; ++ ++ float edgeflag; ++ int edgeflag_attr; ++ ++ uint32_t vertex_words; ++ uint32_t packet_vertex_limit; ++ ++ struct translate *translate; ++ ++ boolean primitive_restart; ++ uint32_t prim; ++ uint32_t restart_index; ++ uint32_t instance_id; ++}; ++ ++static INLINE unsigned ++prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) ++{ ++ unsigned i; ++ for (i = 0; i < push; ++i) ++ if (elts[i] == index) ++ break; ++ return i; ++} ++ ++static INLINE unsigned ++prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) ++{ ++ unsigned i; ++ for (i = 0; i < push; ++i) ++ if (elts[i] == index) ++ break; ++ return i; ++} ++ ++static INLINE unsigned ++prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) ++{ ++ unsigned i; ++ for (i = 0; i < push; ++i) ++ if (elts[i] == index) ++ break; ++ return i; ++} ++ ++static void ++emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ uint8_t *elts = (uint8_t *)ctx->idxbuf + start; ++ ++ while (count) { ++ unsigned push = MIN2(count, ctx->packet_vertex_limit); ++ unsigned size, nr; ++ ++ nr = push; ++ if (ctx->primitive_restart) ++ nr = prim_restart_search_i08(elts, push, ctx->restart_index); ++ ++ size = ctx->vertex_words * nr; ++ ++ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); ++ ++ ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id, ++ ctx->chan->cur); ++ ++ ctx->chan->cur += size; ++ count -= nr; ++ elts += nr; ++ ++ if (nr != push) { ++ count--; ++ elts++; ++ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); ++ OUT_RING (ctx->chan, 0); ++ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | ++ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); ++ } ++ } ++} ++ ++static void ++emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ uint16_t *elts = (uint16_t *)ctx->idxbuf + start; ++ ++ while (count) { ++ unsigned push = MIN2(count, ctx->packet_vertex_limit); ++ unsigned size, nr; ++ ++ nr = push; ++ if (ctx->primitive_restart) ++ nr = prim_restart_search_i16(elts, push, ctx->restart_index); ++ ++ size = ctx->vertex_words * nr; ++ ++ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); ++ ++ ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id, ++ ctx->chan->cur); ++ ++ ctx->chan->cur += size; ++ count -= nr; ++ elts += nr; ++ ++ if (nr != push) { ++ count--; ++ elts++; ++ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); ++ OUT_RING (ctx->chan, 0); ++ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | ++ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); ++ } ++ } ++} ++ ++static void ++emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ uint32_t *elts = (uint32_t *)ctx->idxbuf + start; ++ ++ while (count) { ++ unsigned push = MIN2(count, ctx->packet_vertex_limit); ++ unsigned size, nr; ++ ++ nr = push; ++ if (ctx->primitive_restart) ++ nr = prim_restart_search_i32(elts, push, ctx->restart_index); ++ ++ size = ctx->vertex_words * nr; ++ ++ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); ++ ++ ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id, ++ ctx->chan->cur); ++ ++ ctx->chan->cur += size; ++ count -= nr; ++ elts += nr; ++ ++ if (nr != push) { ++ count--; ++ elts++; ++ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); ++ OUT_RING (ctx->chan, 0); ++ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | ++ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); ++ } ++ } ++} ++ ++static void ++emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ while (count) { ++ unsigned push = MIN2(count, ctx->packet_vertex_limit); ++ unsigned size = ctx->vertex_words * push; ++ ++ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); ++ ++ ctx->translate->run(ctx->translate, start, push, ctx->instance_id, ++ ctx->chan->cur); ++ ctx->chan->cur += size; ++ count -= push; ++ start += push; ++ } ++} ++ ++ ++#define NVC0_PRIM_GL_CASE(n) \ ++ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n ++ ++static INLINE unsigned ++nvc0_prim_gl(unsigned prim) ++{ ++ switch (prim) { ++ NVC0_PRIM_GL_CASE(POINTS); ++ NVC0_PRIM_GL_CASE(LINES); ++ NVC0_PRIM_GL_CASE(LINE_LOOP); ++ NVC0_PRIM_GL_CASE(LINE_STRIP); ++ NVC0_PRIM_GL_CASE(TRIANGLES); ++ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); ++ NVC0_PRIM_GL_CASE(TRIANGLE_FAN); ++ NVC0_PRIM_GL_CASE(QUADS); ++ NVC0_PRIM_GL_CASE(QUAD_STRIP); ++ NVC0_PRIM_GL_CASE(POLYGON); ++ NVC0_PRIM_GL_CASE(LINES_ADJACENCY); ++ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); ++ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); ++ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); ++ /* ++ NVC0_PRIM_GL_CASE(PATCHES); */ ++ default: ++ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; ++ break; ++ } ++} ++ ++void ++nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) ++{ ++ struct push_context ctx; ++ unsigned i, index_size; ++ unsigned inst = info->instance_count; ++ ++ ctx.chan = nvc0->screen->base.channel; ++ ctx.translate = nvc0->vertex->translate; ++ ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; ++ ctx.vertex_words = nvc0->vertex->vtx_size; ++ ++ for (i = 0; i < nvc0->num_vtxbufs; ++i) { ++ uint8_t *data; ++ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; ++ struct nvc0_resource *res = nvc0_resource(vb->buffer); ++ ++ data = nvc0_resource_map_offset(nvc0, res, ++ vb->buffer_offset, NOUVEAU_BO_RD); ++ if (info->indexed) ++ data += info->index_bias * vb->stride; ++ ++ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); ++ } ++ ++ if (info->indexed) { ++ ctx.idxbuf = nvc0_resource_map_offset(nvc0, ++ nvc0_resource(nvc0->idxbuf.buffer), ++ nvc0->idxbuf.offset, NOUVEAU_BO_RD); ++ if (!ctx.idxbuf) ++ return; ++ index_size = nvc0->idxbuf.index_size; ++ ctx.primitive_restart = info->primitive_restart; ++ ctx.restart_index = info->restart_index; ++ } else { ++ ctx.idxbuf = NULL; ++ index_size = 0; ++ ctx.primitive_restart = FALSE; ++ ctx.restart_index = 0; ++ } ++ ++ ctx.instance_id = info->start_instance; ++ ctx.prim = nvc0_prim_gl(info->mode); ++ ++ while (inst--) { ++ BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1); ++ OUT_RING (ctx.chan, ctx.prim); ++ switch (index_size) { ++ case 0: ++ emit_vertices_seq(&ctx, info->start, info->count); ++ break; ++ case 1: ++ emit_vertices_i08(&ctx, info->start, info->count); ++ break; ++ case 2: ++ emit_vertices_i16(&ctx, info->start, info->count); ++ break; ++ case 4: ++ emit_vertices_i32(&ctx, info->start, info->count); ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0); ++ ++ ctx.instance_id++; ++ ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ } ++ ++ if (info->indexed) ++ nvc0_resource_unmap(nvc0_resource(nvc0->idxbuf.buffer)); ++ ++ for (i = 0; i < nvc0->num_vtxbufs; ++i) ++ nvc0_resource_unmap(nvc0_resource(nvc0->vtxbuf[i].buffer)); ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c +new file mode 100644 +index 0000000..6f51600 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_push2.c +@@ -0,0 +1,333 @@ ++ ++#if 0 /* not used, kept for now to compare with util/translate */ ++ ++#include "pipe/p_context.h" ++#include "pipe/p_state.h" ++#include "util/u_inlines.h" ++#include "util/u_format.h" ++#include "translate/translate.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_resource.h" ++ ++#include "nvc0_3d.xml.h" ++ ++struct push_context { ++ struct nvc0_context *nvc0; ++ ++ uint vertex_size; ++ ++ void *idxbuf; ++ uint idxsize; ++ ++ float edgeflag; ++ int edgeflag_input; ++ ++ struct { ++ void *map; ++ void (*push)(struct nouveau_channel *, void *); ++ uint32_t stride; ++ uint32_t divisor; ++ uint32_t step; ++ } attr[32]; ++ int num_attrs; ++}; ++ ++static void ++emit_b32_1(struct nouveau_channel *chan, void *data) ++{ ++ uint32_t *v = data; ++ ++ OUT_RING(chan, v[0]); ++} ++ ++static void ++emit_b32_2(struct nouveau_channel *chan, void *data) ++{ ++ uint32_t *v = data; ++ ++ OUT_RING(chan, v[0]); ++ OUT_RING(chan, v[1]); ++} ++ ++static void ++emit_b32_3(struct nouveau_channel *chan, void *data) ++{ ++ uint32_t *v = data; ++ ++ OUT_RING(chan, v[0]); ++ OUT_RING(chan, v[1]); ++ OUT_RING(chan, v[2]); ++} ++ ++static void ++emit_b32_4(struct nouveau_channel *chan, void *data) ++{ ++ uint32_t *v = data; ++ ++ OUT_RING(chan, v[0]); ++ OUT_RING(chan, v[1]); ++ OUT_RING(chan, v[2]); ++ OUT_RING(chan, v[3]); ++} ++ ++static void ++emit_b16_1(struct nouveau_channel *chan, void *data) ++{ ++ uint16_t *v = data; ++ ++ OUT_RING(chan, v[0]); ++} ++ ++static void ++emit_b16_3(struct nouveau_channel *chan, void *data) ++{ ++ uint16_t *v = data; ++ ++ OUT_RING(chan, (v[1] << 16) | v[0]); ++ OUT_RING(chan, v[2]); ++} ++ ++static void ++emit_b08_1(struct nouveau_channel *chan, void *data) ++{ ++ uint8_t *v = data; ++ ++ OUT_RING(chan, v[0]); ++} ++ ++static void ++emit_b08_3(struct nouveau_channel *chan, void *data) ++{ ++ uint8_t *v = data; ++ ++ OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); ++} ++ ++static void ++emit_b64_1(struct nouveau_channel *chan, void *data) ++{ ++ double *v = data; ++ ++ OUT_RINGf(chan, v[0]); ++} ++ ++static void ++emit_b64_2(struct nouveau_channel *chan, void *data) ++{ ++ double *v = data; ++ ++ OUT_RINGf(chan, v[0]); ++ OUT_RINGf(chan, v[1]); ++} ++ ++static void ++emit_b64_3(struct nouveau_channel *chan, void *data) ++{ ++ double *v = data; ++ ++ OUT_RINGf(chan, v[0]); ++ OUT_RINGf(chan, v[1]); ++ OUT_RINGf(chan, v[2]); ++} ++ ++static void ++emit_b64_4(struct nouveau_channel *chan, void *data) ++{ ++ double *v = data; ++ ++ OUT_RINGf(chan, v[0]); ++ OUT_RINGf(chan, v[1]); ++ OUT_RINGf(chan, v[2]); ++ OUT_RINGf(chan, v[3]); ++} ++ ++static INLINE void ++emit_vertex(struct push_context *ctx, unsigned n) ++{ ++ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; ++ int i; ++ ++ if (ctx->edgeflag_input < 32) { ++ /* TODO */ ++ } ++ ++ BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size); ++ for (i = 0; i < ctx->num_attrs; ++i) ++ ctx->attr[i].push(chan, ++ (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride); ++} ++ ++static void ++emit_edgeflag(struct push_context *ctx, boolean enabled) ++{ ++ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel; ++ ++ IMMED_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled); ++} ++ ++static void ++emit_elt08(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ uint8_t *idxbuf = ctx->idxbuf; ++ ++ while (count--) ++ emit_vertex(ctx, idxbuf[start++]); ++} ++ ++static void ++emit_elt16(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ uint16_t *idxbuf = ctx->idxbuf; ++ ++ while (count--) ++ emit_vertex(ctx, idxbuf[start++]); ++} ++ ++static void ++emit_elt32(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ uint32_t *idxbuf = ctx->idxbuf; ++ ++ while (count--) ++ emit_vertex(ctx, idxbuf[start++]); ++} ++ ++static void ++emit_seq(struct push_context *ctx, unsigned start, unsigned count) ++{ ++ while (count--) ++ emit_vertex(ctx, start++); ++} ++ ++#define NVC0_PRIM_GL_CASE(n) \ ++ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n ++ ++static INLINE unsigned ++nvc0_prim_gl(unsigned prim) ++{ ++ switch (prim) { ++ NVC0_PRIM_GL_CASE(POINTS); ++ NVC0_PRIM_GL_CASE(LINES); ++ NVC0_PRIM_GL_CASE(LINE_LOOP); ++ NVC0_PRIM_GL_CASE(LINE_STRIP); ++ NVC0_PRIM_GL_CASE(TRIANGLES); ++ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); ++ NVC0_PRIM_GL_CASE(TRIANGLE_FAN); ++ NVC0_PRIM_GL_CASE(QUADS); ++ NVC0_PRIM_GL_CASE(QUAD_STRIP); ++ NVC0_PRIM_GL_CASE(POLYGON); ++ NVC0_PRIM_GL_CASE(LINES_ADJACENCY); ++ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); ++ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); ++ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); ++ /* ++ NVC0_PRIM_GL_CASE(PATCHES); */ ++ default: ++ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; ++ break; ++ } ++} ++ ++void ++nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info) ++{ ++ struct push_context ctx; ++ unsigned i, n; ++ unsigned inst = info->instance_count; ++ unsigned prim = nvc0_prim_gl(info->mode); ++ ++ ctx.nvc0 = nvc0; ++ ctx.vertex_size = nvc0->vertex->vtx_size; ++ ctx.idxbuf = NULL; ++ ctx.num_attrs = 0; ++ ctx.edgeflag = 0.5f; ++ ctx.edgeflag_input = 32; ++ ++ for (i = 0; i < nvc0->vertex->num_elements; ++i) { ++ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; ++ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; ++ struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo; ++ unsigned nr_components; ++ ++ if (!(nvc0->vbo_fifo & (1 << i))) ++ continue; ++ n = ctx.num_attrs++; ++ ++ if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) ++ return; ++ ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset; ++ ++ nouveau_bo_unmap(bo); ++ ++ ctx.attr[n].stride = vb->stride; ++ ctx.attr[n].divisor = ve->instance_divisor; ++ ++ nr_components = util_format_get_nr_components(ve->src_format); ++ switch (util_format_get_component_bits(ve->src_format, ++ UTIL_FORMAT_COLORSPACE_RGB, 0)) { ++ case 8: ++ switch (nr_components) { ++ case 1: ctx.attr[n].push = emit_b08_1; break; ++ case 2: ctx.attr[n].push = emit_b16_1; break; ++ case 3: ctx.attr[n].push = emit_b08_3; break; ++ case 4: ctx.attr[n].push = emit_b32_1; break; ++ } ++ break; ++ case 16: ++ switch (nr_components) { ++ case 1: ctx.attr[n].push = emit_b16_1; break; ++ case 2: ctx.attr[n].push = emit_b32_1; break; ++ case 3: ctx.attr[n].push = emit_b16_3; break; ++ case 4: ctx.attr[n].push = emit_b32_2; break; ++ } ++ break; ++ case 32: ++ switch (nr_components) { ++ case 1: ctx.attr[n].push = emit_b32_1; break; ++ case 2: ctx.attr[n].push = emit_b32_2; break; ++ case 3: ctx.attr[n].push = emit_b32_3; break; ++ case 4: ctx.attr[n].push = emit_b32_4; break; ++ } ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ } ++ ++ if (info->indexed) { ++ struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); ++ if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) ++ return; ++ ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset; ++ nouveau_bo_unmap(res->bo); ++ ctx.idxsize = nvc0->idxbuf.index_size; ++ } else { ++ ctx.idxsize = 0; ++ } ++ ++ while (inst--) { ++ BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1); ++ OUT_RING (nvc0->screen->base.channel, prim); ++ switch (ctx.idxsize) { ++ case 0: ++ emit_seq(&ctx, info->start, info->count); ++ break; ++ case 1: ++ emit_elt08(&ctx, info->start, info->count); ++ break; ++ case 2: ++ emit_elt16(&ctx, info->start, info->count); ++ break; ++ case 4: ++ emit_elt32(&ctx, info->start, info->count); ++ break; ++ } ++ IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0); ++ ++ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ } ++} ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c +new file mode 100644 +index 0000000..cc83fbe +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_query.c +@@ -0,0 +1,337 @@ ++/* ++ * Copyright 2011 Nouveau Project ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Authors: Christoph Bumiller ++ */ ++ ++#include "nvc0_context.h" ++#include "nouveau/nv_object.xml.h" ++ ++/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts ++ * (since we use only a single GPU channel per screen) will not work properly. ++ * ++ * The first is not that big of an issue because OpenGL does not allow nested ++ * queries anyway. ++ */ ++ ++struct nvc0_query { ++ uint32_t *data; ++ uint32_t type; ++ uint32_t sequence; ++ struct nouveau_bo *bo; ++ uint32_t base; ++ uint32_t offset; /* base + i * 16 */ ++ boolean ready; ++ boolean is64bit; ++ struct nvc0_mm_allocation *mm; ++}; ++ ++#define NVC0_QUERY_ALLOC_SPACE 128 ++ ++static INLINE struct nvc0_query * ++nvc0_query(struct pipe_query *pipe) ++{ ++ return (struct nvc0_query *)pipe; ++} ++ ++static boolean ++nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size) ++{ ++ struct nvc0_screen *screen = nvc0->screen; ++ int ret; ++ ++ if (q->bo) { ++ nouveau_bo_ref(NULL, &q->bo); ++ if (q->mm) { ++ if (q->ready) ++ nvc0_mm_free(q->mm); ++ else ++ nvc0_fence_sched_release(screen->fence.current, q->mm); ++ } ++ } ++ if (size) { ++ q->mm = nvc0_mm_allocate(screen->mm_GART, size, &q->bo, &q->base); ++ if (!q->bo) ++ return FALSE; ++ q->offset = q->base; ++ ++ ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD | ++ NOUVEAU_BO_NOSYNC); ++ if (ret) { ++ nvc0_query_allocate(nvc0, q, 0); ++ return FALSE; ++ } ++ q->data = q->bo->map; ++ nouveau_bo_unmap(q->bo); ++ } ++ return TRUE; ++} ++ ++static void ++nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) ++{ ++ nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0); ++ FREE(nvc0_query(pq)); ++} ++ ++static struct pipe_query * ++nvc0_query_create(struct pipe_context *pipe, unsigned type) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ struct nvc0_query *q; ++ ++ q = CALLOC_STRUCT(nvc0_query); ++ if (!q) ++ return NULL; ++ ++ if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) { ++ FREE(q); ++ return NULL; ++ } ++ ++ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || ++ type == PIPE_QUERY_PRIMITIVES_EMITTED || ++ type == PIPE_QUERY_SO_STATISTICS); ++ q->type = type; ++ ++ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { ++ q->offset -= 16; ++ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ ++ } ++ ++ return (struct pipe_query *)q; ++} ++ ++static void ++nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q, ++ unsigned offset, uint32_t get) ++{ ++ offset += q->offset; ++ ++ MARK_RING (chan, 5, 2); ++ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); ++ OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); ++ OUT_RING (chan, q->sequence); ++ OUT_RING (chan, get); ++} ++ ++static void ++nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_query *q = nvc0_query(pq); ++ ++ /* For occlusion queries we have to change the storage, because a previous ++ * query might set the initial render conition to FALSE even *after* we re- ++ * initialized it to TRUE. ++ */ ++ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { ++ q->offset += 16; ++ q->data += 16 / sizeof(*q->data); ++ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) ++ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); ++ ++ /* XXX: can we do this with the GPU, and sync with respect to a previous ++ * query ? ++ */ ++ q->data[1] = 1; /* initial render condition = TRUE */ ++ } ++ if (!q->is64bit) ++ q->data[0] = q->sequence++; /* the previously used one */ ++ ++ switch (q->type) { ++ case PIPE_QUERY_OCCLUSION_COUNTER: ++ IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT); ++ IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); ++ break; ++ case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ ++ IMMED_RING(chan, RING_3D(COUNTER_RESET), ++ NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); ++ break; ++ case PIPE_QUERY_PRIMITIVES_EMITTED: ++ IMMED_RING(chan, RING_3D(COUNTER_RESET), ++ NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); ++ break; ++ case PIPE_QUERY_SO_STATISTICS: ++ BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2); ++ OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); ++ OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); ++ break; ++ case PIPE_QUERY_TIMESTAMP_DISJOINT: ++ case PIPE_QUERY_TIME_ELAPSED: ++ nvc0_query_get(chan, q, 0x10, 0x00005002); ++ break; ++ default: ++ break; ++ } ++ q->ready = FALSE; ++} ++ ++static void ++nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_query *q = nvc0_query(pq); ++ ++ const int index = 0; /* for multiple vertex streams */ ++ ++ switch (q->type) { ++ case PIPE_QUERY_OCCLUSION_COUNTER: ++ nvc0_query_get(chan, q, 0, 0x0100f002); ++ BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); ++ OUT_RING (chan, 0); ++ break; ++ case PIPE_QUERY_PRIMITIVES_GENERATED: ++ nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5)); ++ break; ++ case PIPE_QUERY_PRIMITIVES_EMITTED: ++ nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5)); ++ break; ++ case PIPE_QUERY_SO_STATISTICS: ++ nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5)); ++ nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5)); ++ break; ++ case PIPE_QUERY_TIMESTAMP_DISJOINT: ++ case PIPE_QUERY_TIME_ELAPSED: ++ nvc0_query_get(chan, q, 0, 0x00005002); ++ break; ++ case PIPE_QUERY_GPU_FINISHED: ++ nvc0_query_get(chan, q, 0, 0x1000f010); ++ break; ++ default: ++ assert(0); ++ break; ++ } ++} ++ ++static INLINE boolean ++nvc0_query_ready(struct nvc0_query *q) ++{ ++ return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); ++} ++ ++static INLINE boolean ++nvc0_query_wait(struct nvc0_query *q) ++{ ++ int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD); ++ if (ret) ++ return FALSE; ++ nouveau_bo_unmap(q->bo); ++ return TRUE; ++} ++ ++static boolean ++nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, ++ boolean wait, void *result) ++{ ++ struct nvc0_query *q = nvc0_query(pq); ++ uint64_t *res64 = result; ++ uint32_t *res32 = result; ++ boolean *res8 = result; ++ uint64_t *data64 = (uint64_t *)q->data; ++ ++ if (q->type == PIPE_QUERY_GPU_FINISHED) { ++ res8[0] = nvc0_query_ready(q); ++ return TRUE; ++ } ++ ++ if (!q->ready) /* update ? */ ++ q->ready = nvc0_query_ready(q); ++ if (!q->ready) { ++ struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel; ++ if (!wait) { ++ if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */ ++ FIRE_RING(chan); ++ return FALSE; ++ } ++ if (!nvc0_query_wait(q)) ++ return FALSE; ++ } ++ q->ready = TRUE; ++ ++ switch (q->type) { ++ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ ++ res32[0] = q->data[1]; ++ break; ++ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ ++ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ ++ res64[0] = data64[0]; ++ break; ++ case PIPE_QUERY_SO_STATISTICS: ++ res64[0] = data64[0]; ++ res64[1] = data64[1]; ++ break; ++ case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ ++ res64[0] = 1000000000; ++ res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE; ++ break; ++ case PIPE_QUERY_TIME_ELAPSED: ++ res64[0] = data64[1] - data64[3]; ++ break; ++ default: ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++static void ++nvc0_render_condition(struct pipe_context *pipe, ++ struct pipe_query *pq, uint mode) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_query *q; ++ ++ if (!pq) { ++ IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); ++ return; ++ } ++ q = nvc0_query(pq); ++ ++ if (mode == PIPE_RENDER_COND_WAIT || ++ mode == PIPE_RENDER_COND_BY_REGION_WAIT) { ++ BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); ++ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); ++ OUT_RING (chan, q->sequence); ++ OUT_RING (chan, 0x00001001); ++ } ++ ++ BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); ++ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); ++ OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO); ++} ++ ++void ++nvc0_init_query_functions(struct nvc0_context *nvc0) ++{ ++ nvc0->pipe.create_query = nvc0_query_create; ++ nvc0->pipe.destroy_query = nvc0_query_destroy; ++ nvc0->pipe.begin_query = nvc0_query_begin; ++ nvc0->pipe.end_query = nvc0_query_end; ++ nvc0->pipe.get_query_result = nvc0_query_result; ++ nvc0->pipe.render_condition = nvc0_render_condition; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c +new file mode 100644 +index 0000000..7e42ced +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_resource.c +@@ -0,0 +1,71 @@ ++ ++#include "pipe/p_context.h" ++#include "nvc0_resource.h" ++#include "nouveau/nouveau_screen.h" ++ ++static unsigned ++nvc0_resource_is_referenced(struct pipe_context *pipe, ++ struct pipe_resource *resource, ++ unsigned face, int layer) ++{ ++ struct nvc0_resource *res = nvc0_resource(resource); ++ unsigned flags = 0; ++ ++#ifdef NOUVEAU_USERSPACE_MM ++ flags = res->status; ++#else ++ unsigned bo_flags = nouveau_bo_pending(res->bo); ++ if (bo_flags & NOUVEAU_BO_RD) ++ flags = PIPE_REFERENCED_FOR_READ; ++ if (bo_flags & NOUVEAU_BO_WR) ++ flags |= PIPE_REFERENCED_FOR_WRITE; ++#endif ++ return flags; ++} ++ ++static struct pipe_resource * ++nvc0_resource_create(struct pipe_screen *screen, ++ const struct pipe_resource *templ) ++{ ++ switch (templ->target) { ++ case PIPE_BUFFER: ++ return nvc0_buffer_create(screen, templ); ++ default: ++ return nvc0_miptree_create(screen, templ); ++ } ++} ++ ++static struct pipe_resource * ++nvc0_resource_from_handle(struct pipe_screen * screen, ++ const struct pipe_resource *templ, ++ struct winsys_handle *whandle) ++{ ++ if (templ->target == PIPE_BUFFER) ++ return NULL; ++ else ++ return nvc0_miptree_from_handle(screen, templ, whandle); ++} ++ ++void ++nvc0_init_resource_functions(struct pipe_context *pcontext) ++{ ++ pcontext->get_transfer = u_get_transfer_vtbl; ++ pcontext->transfer_map = u_transfer_map_vtbl; ++ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; ++ pcontext->transfer_unmap = u_transfer_unmap_vtbl; ++ pcontext->transfer_destroy = u_transfer_destroy_vtbl; ++ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; ++ pcontext->is_resource_referenced = nvc0_resource_is_referenced; ++ pcontext->create_surface = nvc0_miptree_surface_new; ++ pcontext->surface_destroy = nvc0_miptree_surface_del; ++} ++ ++void ++nvc0_screen_init_resource_functions(struct pipe_screen *pscreen) ++{ ++ pscreen->resource_create = nvc0_resource_create; ++ pscreen->resource_from_handle = nvc0_resource_from_handle; ++ pscreen->resource_get_handle = u_resource_get_handle_vtbl; ++ pscreen->resource_destroy = u_resource_destroy_vtbl; ++ pscreen->user_buffer_create = nvc0_user_buffer_create; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h +new file mode 100644 +index 0000000..17e7964 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_resource.h +@@ -0,0 +1,201 @@ ++ ++#ifndef __NVC0_RESOURCE_H__ ++#define __NVC0_RESOURCE_H__ ++ ++#include "util/u_transfer.h" ++#include "util/u_double_list.h" ++#define NOUVEAU_NVC0 ++#include "nouveau/nouveau_winsys.h" ++#undef NOUVEAU_NVC0 ++ ++#include "nvc0_fence.h" ++ ++struct pipe_resource; ++struct nouveau_bo; ++struct nvc0_context; ++ ++#define NVC0_BUFFER_SCORE_MIN -25000 ++#define NVC0_BUFFER_SCORE_MAX 25000 ++#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000 ++ ++/* DIRTY: buffer was (or will be after the next flush) written to by GPU and ++ * resource->data has not been updated to reflect modified VRAM contents ++ * ++ * USER_MEMORY: resource->data is a pointer to client memory and may change ++ * between GL calls ++ */ ++#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) ++#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) ++ ++/* Resources, if mapped into the GPU's address space, are guaranteed to ++ * have constant virtual addresses. ++ * The address of a resource will lie within the nouveau_bo referenced, ++ * and this bo should be added to the memory manager's validation list. ++ */ ++struct nvc0_resource { ++ struct pipe_resource base; ++ const struct u_resource_vtbl *vtbl; ++ ++ uint8_t *data; ++ struct nouveau_bo *bo; ++ uint32_t offset; ++ ++ uint8_t status; ++ uint8_t domain; ++ ++ int16_t score; /* low if mapped very often, if high can move to VRAM */ ++ ++ struct nvc0_fence *fence; ++ struct nvc0_fence *fence_wr; ++ ++ struct nvc0_mm_allocation *mm; ++}; ++ ++boolean ++nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, ++ unsigned start, unsigned size); ++ ++boolean ++nvc0_buffer_migrate(struct nvc0_context *, ++ struct nvc0_resource *, unsigned domain); ++ ++static INLINE void ++nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res, ++ int16_t score) ++{ ++ if (score < 0) { ++ if (res->score > NVC0_BUFFER_SCORE_MIN) ++ res->score += score; ++ } else ++ if (score > 0){ ++ if (res->score < NVC0_BUFFER_SCORE_MAX) ++ res->score += score; ++ if (res->domain == NOUVEAU_BO_GART && ++ res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD) ++ nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM); ++ } ++} ++ ++/* XXX: wait for fence (atm only using this for vertex push) */ ++static INLINE void * ++nvc0_resource_map_offset(struct nvc0_context *nvc0, ++ struct nvc0_resource *res, uint32_t offset, ++ uint32_t flags) ++{ ++ void *map; ++ ++ nvc0_buffer_adjust_score(nvc0, res, -250); ++ ++ if ((res->domain == NOUVEAU_BO_VRAM) && ++ (res->status & NVC0_BUFFER_STATUS_DIRTY)) ++ nvc0_buffer_download(nvc0, res, 0, res->base.width0); ++ ++ if ((res->domain != NOUVEAU_BO_GART) || ++ (res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) ++ return res->data + offset; ++ ++ if (res->mm) ++ flags |= NOUVEAU_BO_NOSYNC; ++ ++ if (nouveau_bo_map_range(res->bo, res->offset + offset, ++ res->base.width0, flags)) ++ return NULL; ++ ++ map = res->bo->map; ++ nouveau_bo_unmap(res->bo); ++ return map; ++} ++ ++static INLINE void ++nvc0_resource_unmap(struct nvc0_resource *res) ++{ ++ /* no-op */ ++} ++ ++#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) ++ ++#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0)) ++#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1)) ++#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2)) ++ ++#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \ ++ NVC0_TILE_DIM_SHIFT(m, 0)) << \ ++ NVC0_TILE_DIM_SHIFT(m, 1)) ++ ++#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2)) ++ ++struct nvc0_miptree_level { ++ uint32_t offset; ++ uint32_t pitch; ++ uint32_t tile_mode; ++}; ++ ++#define NVC0_MAX_TEXTURE_LEVELS 16 ++ ++struct nvc0_miptree { ++ struct nvc0_resource base; ++ struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS]; ++ uint32_t total_size; ++ uint32_t layer_stride; ++ boolean layout_3d; /* TRUE if layer count varies with mip level */ ++}; ++ ++static INLINE struct nvc0_miptree * ++nvc0_miptree(struct pipe_resource *pt) ++{ ++ return (struct nvc0_miptree *)pt; ++} ++ ++static INLINE struct nvc0_resource * ++nvc0_resource(struct pipe_resource *resource) ++{ ++ return (struct nvc0_resource *)resource; ++} ++ ++/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ ++static INLINE boolean ++nvc0_resource_mapped_by_gpu(struct pipe_resource *resource) ++{ ++ return nvc0_resource(resource)->domain != 0; ++} ++ ++void ++nvc0_init_resource_functions(struct pipe_context *pcontext); ++ ++void ++nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); ++ ++/* Internal functions: ++ */ ++struct pipe_resource * ++nvc0_miptree_create(struct pipe_screen *pscreen, ++ const struct pipe_resource *tmp); ++ ++struct pipe_resource * ++nvc0_miptree_from_handle(struct pipe_screen *pscreen, ++ const struct pipe_resource *template, ++ struct winsys_handle *whandle); ++ ++struct pipe_resource * ++nvc0_buffer_create(struct pipe_screen *pscreen, ++ const struct pipe_resource *templ); ++ ++struct pipe_resource * ++nvc0_user_buffer_create(struct pipe_screen *screen, ++ void *ptr, ++ unsigned bytes, ++ unsigned usage); ++ ++ ++struct pipe_surface * ++nvc0_miptree_surface_new(struct pipe_context *, ++ struct pipe_resource *, ++ const struct pipe_surface *templ); ++ ++void ++nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *); ++ ++boolean ++nvc0_user_buffer_upload(struct nvc0_resource *, unsigned base, unsigned size); ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c +new file mode 100644 +index 0000000..f608b32 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_screen.c +@@ -0,0 +1,670 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "util/u_format_s3tc.h" ++#include "pipe/p_screen.h" ++ ++#include "nvc0_fence.h" ++#include "nvc0_context.h" ++#include "nvc0_screen.h" ++ ++#include "nouveau/nv_object.xml.h" ++#include "nvc0_graph_macros.h" ++ ++static boolean ++nvc0_screen_is_format_supported(struct pipe_screen *pscreen, ++ enum pipe_format format, ++ enum pipe_texture_target target, ++ unsigned sample_count, ++ unsigned bindings, unsigned geom_flags) ++{ ++ if (sample_count > 1) ++ return FALSE; ++ ++ if (!util_format_s3tc_enabled) { ++ switch (format) { ++ case PIPE_FORMAT_DXT1_RGB: ++ case PIPE_FORMAT_DXT1_RGBA: ++ case PIPE_FORMAT_DXT3_RGBA: ++ case PIPE_FORMAT_DXT5_RGBA: ++ return FALSE; ++ default: ++ break; ++ } ++ } ++ ++ /* transfers & shared are always supported */ ++ bindings &= ~(PIPE_BIND_TRANSFER_READ | ++ PIPE_BIND_TRANSFER_WRITE | ++ PIPE_BIND_SHARED); ++ ++ return (nvc0_format_table[format].usage & bindings) == bindings; ++} ++ ++static int ++nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) ++{ ++ switch (param) { ++ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: ++ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: ++ return 32; ++ case PIPE_CAP_MAX_COMBINED_SAMPLERS: ++ return 64; ++ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: ++ return 13; ++ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: ++ return 10; ++ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: ++ return 13; ++ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: ++ case PIPE_CAP_TEXTURE_MIRROR_REPEAT: ++ case PIPE_CAP_TEXTURE_SWIZZLE: ++ case PIPE_CAP_TEXTURE_SHADOW_MAP: ++ case PIPE_CAP_NPOT_TEXTURES: ++ case PIPE_CAP_ANISOTROPIC_FILTER: ++ return 1; ++ case PIPE_CAP_TWO_SIDED_STENCIL: ++ case PIPE_CAP_DEPTH_CLAMP: ++ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: ++ case PIPE_CAP_POINT_SPRITE: ++ return 1; ++ case PIPE_CAP_GLSL: ++ case PIPE_CAP_SM3: ++ return 1; ++ case PIPE_CAP_MAX_RENDER_TARGETS: ++ return 8; ++ case PIPE_CAP_TIMER_QUERY: ++ case PIPE_CAP_OCCLUSION_QUERY: ++ return 1; ++ case PIPE_CAP_STREAM_OUTPUT: ++ return 0; ++ case PIPE_CAP_BLEND_EQUATION_SEPARATE: ++ case PIPE_CAP_INDEP_BLEND_ENABLE: ++ case PIPE_CAP_INDEP_BLEND_FUNC: ++ return 1; ++ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: ++ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: ++ return 1; ++ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: ++ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: ++ return 0; ++ case PIPE_CAP_SHADER_STENCIL_EXPORT: ++ return 0; ++ case PIPE_CAP_PRIMITIVE_RESTART: ++ case PIPE_CAP_INSTANCED_DRAWING: ++ return 1; ++ default: ++ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); ++ return 0; ++ } ++} ++ ++static int ++nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, ++ enum pipe_shader_cap param) ++{ ++ switch (shader) { ++ case PIPE_SHADER_VERTEX: ++ /* ++ case PIPE_SHADER_TESSELLATION_CONTROL: ++ case PIPE_SHADER_TESSELLATION_EVALUATION: ++ */ ++ case PIPE_SHADER_GEOMETRY: ++ case PIPE_SHADER_FRAGMENT: ++ break; ++ default: ++ return 0; ++ } ++ ++ switch (param) { ++ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: ++ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: ++ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: ++ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: ++ return 16384; ++ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: ++ return 4; ++ case PIPE_SHADER_CAP_MAX_INPUTS: ++ if (shader == PIPE_SHADER_VERTEX) ++ return 32; ++ return 0x300 / 16; ++ case PIPE_SHADER_CAP_MAX_CONSTS: ++ return 65536 / 16; ++ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: ++ return 14; ++ case PIPE_SHADER_CAP_MAX_ADDRS: ++ return 1; ++ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: ++ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: ++ return shader != PIPE_SHADER_FRAGMENT; ++ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: ++ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: ++ return 1; ++ case PIPE_SHADER_CAP_MAX_PREDS: ++ return 0; ++ case PIPE_SHADER_CAP_MAX_TEMPS: ++ return NVC0_CAP_MAX_PROGRAM_TEMPS; ++ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: ++ return 1; ++ case PIPE_SHADER_CAP_SUBROUTINES: ++ return 0; /* please inline, or provide function declarations */ ++ default: ++ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); ++ return 0; ++ } ++} ++ ++static float ++nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) ++{ ++ switch (param) { ++ case PIPE_CAP_MAX_LINE_WIDTH: ++ case PIPE_CAP_MAX_LINE_WIDTH_AA: ++ return 10.0f; ++ case PIPE_CAP_MAX_POINT_WIDTH: ++ case PIPE_CAP_MAX_POINT_WIDTH_AA: ++ return 64.0f; ++ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: ++ return 16.0f; ++ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: ++ return 4.0f; ++ default: ++ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); ++ return 0.0f; ++ } ++} ++ ++static void ++nvc0_screen_destroy(struct pipe_screen *pscreen) ++{ ++ struct nvc0_screen *screen = nvc0_screen(pscreen); ++ ++ nvc0_fence_wait(screen->fence.current); ++ nvc0_fence_reference(&screen->fence.current, NULL); ++ ++ nouveau_bo_ref(NULL, &screen->text); ++ nouveau_bo_ref(NULL, &screen->tls); ++ nouveau_bo_ref(NULL, &screen->txc); ++ nouveau_bo_ref(NULL, &screen->fence.bo); ++ nouveau_bo_ref(NULL, &screen->mp_stack_bo); ++ ++ nouveau_resource_destroy(&screen->text_heap); ++ ++ if (screen->tic.entries) ++ FREE(screen->tic.entries); ++ ++ nvc0_mm_destroy(screen->mm_GART); ++ nvc0_mm_destroy(screen->mm_VRAM); ++ nvc0_mm_destroy(screen->mm_VRAM_fe0); ++ ++ nouveau_grobj_free(&screen->fermi); ++ nouveau_grobj_free(&screen->eng2d); ++ nouveau_grobj_free(&screen->m2mf); ++ ++ nouveau_screen_fini(&screen->base); ++ ++ FREE(screen); ++} ++ ++static int ++nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, ++ unsigned size, const uint32_t *data) ++{ ++ struct nouveau_channel *chan = screen->base.channel; ++ ++ size /= 4; ++ ++ BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2); ++ OUT_RING (chan, (m - 0x3800) / 8); ++ OUT_RING (chan, pos); ++ BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); ++ OUT_RING (chan, pos); ++ OUT_RINGp (chan, data, size); ++ ++ return pos + size; ++} ++ ++static void ++nvc0_screen_fence_reference(struct pipe_screen *pscreen, ++ struct pipe_fence_handle **ptr, ++ struct pipe_fence_handle *fence) ++{ ++ nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence)); ++} ++ ++static int ++nvc0_screen_fence_signalled(struct pipe_screen *pscreen, ++ struct pipe_fence_handle *fence, ++ unsigned flags) ++{ ++ return !(nvc0_fence_signalled(nvc0_fence(fence))); ++} ++ ++static int ++nvc0_screen_fence_finish(struct pipe_screen *pscreen, ++ struct pipe_fence_handle *fence, ++ unsigned flags) ++{ ++ return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE; ++} ++ ++static void ++nvc0_magic_3d_init(struct nouveau_channel *chan) ++{ ++ BEGIN_RING(chan, RING_3D_(0x10cc), 1); ++ OUT_RING (chan, 0xff); ++ BEGIN_RING(chan, RING_3D_(0x10e0), 2); ++ OUT_RING(chan, 0xff); ++ OUT_RING(chan, 0xff); ++ BEGIN_RING(chan, RING_3D_(0x10ec), 2); ++ OUT_RING(chan, 0xff); ++ OUT_RING(chan, 0xff); ++ BEGIN_RING(chan, RING_3D_(0x074c), 1); ++ OUT_RING (chan, 0x3f); ++ ++ BEGIN_RING(chan, RING_3D_(0x10f8), 1); ++ OUT_RING (chan, 0x0101); ++ ++ BEGIN_RING(chan, RING_3D_(0x16a8), 1); ++ OUT_RING (chan, (3 << 16) | 3); ++ BEGIN_RING(chan, RING_3D_(0x1794), 1); ++ OUT_RING (chan, (2 << 16) | 2); ++ BEGIN_RING(chan, RING_3D_(0x0de8), 1); ++ OUT_RING (chan, 1); ++ ++#if 0 /* software method */ ++ BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */ ++ OUT_RING (chan, 0); ++#endif ++ ++ BEGIN_RING(chan, RING_3D_(0x12ac), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x0218), 1); ++ OUT_RING (chan, 0x10); ++ BEGIN_RING(chan, RING_3D_(0x10fc), 1); ++ OUT_RING (chan, 0x10); ++ BEGIN_RING(chan, RING_3D_(0x1290), 1); ++ OUT_RING (chan, 0x10); ++ BEGIN_RING(chan, RING_3D_(0x12d8), 2); ++ OUT_RING (chan, 0x10); ++ OUT_RING (chan, 0x10); ++ BEGIN_RING(chan, RING_3D_(0x06d4), 1); ++ OUT_RING (chan, 8); ++ BEGIN_RING(chan, RING_3D_(0x1140), 1); ++ OUT_RING (chan, 0x10); ++ BEGIN_RING(chan, RING_3D_(0x1610), 1); ++ OUT_RING (chan, 0xe); ++ ++ BEGIN_RING(chan, RING_3D_(0x164c), 1); ++ OUT_RING (chan, 1 << 12); ++ BEGIN_RING(chan, RING_3D_(0x151c), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D_(0x020c), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D_(0x030c), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x0300), 1); ++ OUT_RING (chan, 3); ++#if 0 /* software method */ ++ BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */ ++ OUT_RING (chan, 0); ++#endif ++ BEGIN_RING(chan, RING_3D_(0x02d0), 1); ++ OUT_RING (chan, 0x1f40); ++ BEGIN_RING(chan, RING_3D_(0x00fdc), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D_(0x19c0), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D_(0x075c), 1); ++ OUT_RING (chan, 3); ++ ++ BEGIN_RING(chan, RING_3D_(0x0fac), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x0f90), 1); ++ OUT_RING (chan, 0); ++} ++ ++#define FAIL_SCREEN_INIT(str, err) \ ++ do { \ ++ NOUVEAU_ERR(str, err); \ ++ nvc0_screen_destroy(pscreen); \ ++ return NULL; \ ++ } while(0) ++ ++struct pipe_screen * ++nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) ++{ ++ struct nvc0_screen *screen; ++ struct nouveau_channel *chan; ++ struct pipe_screen *pscreen; ++ int ret; ++ unsigned i; ++ ++ screen = CALLOC_STRUCT(nvc0_screen); ++ if (!screen) ++ return NULL; ++ pscreen = &screen->base.base; ++ ++ ret = nouveau_screen_init(&screen->base, dev); ++ if (ret) { ++ nvc0_screen_destroy(pscreen); ++ return NULL; ++ } ++ chan = screen->base.channel; ++ ++ pscreen->winsys = ws; ++ pscreen->destroy = nvc0_screen_destroy; ++ pscreen->context_create = nvc0_create; ++ pscreen->is_format_supported = nvc0_screen_is_format_supported; ++ pscreen->get_param = nvc0_screen_get_param; ++ pscreen->get_shader_param = nvc0_screen_get_shader_param; ++ pscreen->get_paramf = nvc0_screen_get_paramf; ++ pscreen->fence_reference = nvc0_screen_fence_reference; ++ pscreen->fence_signalled = nvc0_screen_fence_signalled; ++ pscreen->fence_finish = nvc0_screen_fence_finish; ++ ++ nvc0_screen_init_resource_functions(pscreen); ++ ++ screen->base.vertex_buffer_flags = NOUVEAU_BO_GART; ++ screen->base.index_buffer_flags = 0; ++ ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, ++ &screen->fence.bo); ++ if (ret) ++ goto fail; ++ nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR); ++ screen->fence.map = screen->fence.bo->map; ++ nouveau_bo_unmap(screen->fence.bo); ++ ++ for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) { ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE, ++ &screen->scratch.bo[i]); ++ if (ret) ++ goto fail; ++ } ++ ++ ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf); ++ if (ret) ++ FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); ++ ++ BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF); ++ BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3); ++ OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); ++ OUT_RING (chan, 0); ++ ++ ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d); ++ if (ret) ++ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); ++ ++ BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D); ++ BEGIN_RING(chan, RING_2D(OPERATION), 1); ++ OUT_RING (chan, NVC0_2D_OPERATION_SRCCOPY); ++ BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_2D_(0x0884), 1); ++ OUT_RING (chan, 0x3f); ++ BEGIN_RING(chan, RING_2D_(0x0888), 1); ++ OUT_RING (chan, 1); ++ ++ ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi); ++ if (ret) ++ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); ++ ++ BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D); ++ BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3); ++ OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); ++ OUT_RING (chan, 0); ++ ++ BEGIN_RING(chan, RING_3D(COND_MODE), 1); ++ OUT_RING (chan, NVC0_3D_COND_MODE_ALWAYS); ++ ++ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); ++ OUT_RING (chan, 1); ++ ++ BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1); ++ OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X); ++ BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); ++ OUT_RING (chan, 0); ++ ++ nvc0_magic_3d_init(chan); ++ ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text); ++ if (ret) ++ goto fail; ++ ++ nouveau_resource_init(&screen->text_heap, 0, 1 << 20); ++ ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, ++ &screen->uniforms); ++ if (ret) ++ goto fail; ++ ++ /* auxiliary constants (6 user clip planes, base instance id) */ ++ BEGIN_RING(chan, RING_3D(CB_SIZE), 3); ++ OUT_RING (chan, 256); ++ OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ for (i = 0; i < 5; ++i) { ++ BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1); ++ OUT_RING (chan, (15 << 4) | 1); ++ } ++ ++ screen->tls_size = 4 * 4 * 32 * 128 * 4; ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, ++ screen->tls_size, &screen->tls); ++ if (ret) ++ goto fail; ++ ++ BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2); ++ OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4); ++ OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RING (chan, screen->tls_size >> 32); ++ OUT_RING (chan, screen->tls_size); ++ BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); ++ OUT_RING (chan, 0); ++ ++ for (i = 0; i < 5; ++i) { ++ BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1); ++ OUT_RING (chan, 0x54); ++ } ++ BEGIN_RING(chan, RING_3D(LINKED_TSC), 1); ++ OUT_RING (chan, 0); ++ ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, ++ &screen->mp_stack_bo); ++ if (ret) ++ goto fail; ++ ++ BEGIN_RING(chan, RING_3D_(0x17bc), 3); ++ OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RING (chan, 1); ++ ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc); ++ if (ret) ++ goto fail; ++ ++ BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3); ++ OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RING (chan, NVC0_TIC_MAX_ENTRIES - 1); ++ ++ BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3); ++ OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RING (chan, NVC0_TSC_MAX_ENTRIES - 1); ++ ++ BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ ++ OUT_RING (chan, 0x3f); ++ ++ BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); ++ OUT_RING (chan, 0); ++ ++ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); ++ OUT_RINGf (chan, 0.0f); ++ OUT_RINGf (chan, 1.0f); ++ ++ /* We use scissors instead of exact view volume clipping, ++ * so they're always enabled. ++ */ ++ BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 8192 << 16); ++ OUT_RING (chan, 8192 << 16); ++ ++ BEGIN_RING(chan, RING_3D_(0x0fac), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x3484), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x0dbc), 1); ++ OUT_RING (chan, 0x00010000); ++ BEGIN_RING(chan, RING_3D_(0x0dd8), 1); ++ OUT_RING (chan, 0xff800006); ++ BEGIN_RING(chan, RING_3D_(0x3488), 1); ++ OUT_RING (chan, 0); ++ ++#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); ++ ++ i = 0; ++ MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables); ++ MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select); ++ MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select); ++ MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select); ++ MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front); ++ MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back); ++ MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc); ++ ++ BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D(GP_SELECT), 1); ++ OUT_RING (chan, 0x40); ++ BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); ++ OUT_RING (chan, 0x30); ++ BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1); ++ OUT_RING (chan, 3); ++ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); ++ OUT_RING (chan, 0x20); ++ BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1); ++ OUT_RING (chan, 0x00); ++ ++ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1); ++ OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL); ++ ++ BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); ++ OUT_RING (chan, 0x11111111); ++ BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1); ++ OUT_RING (chan, 1); ++ ++ BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); ++ OUT_RING (chan, 0xab); ++ OUT_RING (chan, 0x00000000); ++ ++ FIRE_RING (chan); ++ ++ screen->tic.entries = CALLOC(4096, sizeof(void *)); ++ screen->tsc.entries = screen->tic.entries + 2048; ++ ++ screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, ++ 0x000); ++ screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000); ++ screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); ++ ++ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); ++ ++ return pscreen; ++ ++fail: ++ nvc0_screen_destroy(pscreen); ++ return NULL; ++} ++ ++void ++nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) ++{ ++ struct nouveau_channel *chan = screen->base.channel; ++ ++ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; ++ ++ nouveau_bo_validate(chan, screen->text, flags); ++ nouveau_bo_validate(chan, screen->uniforms, flags); ++ nouveau_bo_validate(chan, screen->txc, flags); ++ nouveau_bo_validate(chan, screen->tls, flags); ++ nouveau_bo_validate(chan, screen->mp_stack_bo, flags); ++} ++ ++int ++nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry) ++{ ++ int i = screen->tic.next; ++ ++ while (screen->tic.lock[i / 32] & (1 << (i % 32))) ++ i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); ++ ++ screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); ++ ++ if (screen->tic.entries[i]) ++ nvc0_tic_entry(screen->tic.entries[i])->id = -1; ++ ++ screen->tic.entries[i] = entry; ++ return i; ++} ++ ++int ++nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry) ++{ ++ int i = screen->tsc.next; ++ ++ while (screen->tsc.lock[i / 32] & (1 << (i % 32))) ++ i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); ++ ++ screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); ++ ++ if (screen->tsc.entries[i]) ++ nvc0_tsc_entry(screen->tsc.entries[i])->id = -1; ++ ++ screen->tsc.entries[i] = entry; ++ return i; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h +new file mode 100644 +index 0000000..1fac142 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_screen.h +@@ -0,0 +1,192 @@ ++#ifndef __NVC0_SCREEN_H__ ++#define __NVC0_SCREEN_H__ ++ ++#define NOUVEAU_NVC0 ++#include "nouveau/nouveau_screen.h" ++#undef NOUVEAU_NVC0 ++#include "nvc0_winsys.h" ++#include "nvc0_stateobj.h" ++ ++#define NVC0_TIC_MAX_ENTRIES 2048 ++#define NVC0_TSC_MAX_ENTRIES 2048 ++ ++struct nvc0_mman; ++struct nvc0_context; ++struct nvc0_fence; ++ ++#define NVC0_SCRATCH_SIZE (2 << 20) ++#define NVC0_SCRATCH_NR_BUFFERS 2 ++ ++struct nvc0_screen { ++ struct nouveau_screen base; ++ struct nouveau_winsys *nvws; ++ ++ struct nvc0_context *cur_ctx; ++ ++ struct nouveau_bo *text; ++ struct nouveau_bo *uniforms; ++ struct nouveau_bo *tls; ++ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ ++ struct nouveau_bo *mp_stack_bo; ++ ++ uint64_t tls_size; ++ ++ struct nouveau_resource *text_heap; ++ ++ struct { ++ struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; ++ uint8_t *buf; ++ int index; ++ uint32_t offset; ++ } scratch; ++ ++ struct { ++ void **entries; ++ int next; ++ uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; ++ } tic; ++ ++ struct { ++ void **entries; ++ int next; ++ uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32]; ++ } tsc; ++ ++ struct { ++ uint32_t *map; ++ struct nvc0_fence *head; ++ struct nvc0_fence *tail; ++ struct nvc0_fence *current; ++ uint32_t sequence; ++ uint32_t sequence_ack; ++ struct nouveau_bo *bo; ++ } fence; ++ ++ struct nvc0_mman *mm_GART; ++ struct nvc0_mman *mm_VRAM; ++ struct nvc0_mman *mm_VRAM_fe0; ++ ++ struct nouveau_grobj *fermi; ++ struct nouveau_grobj *eng2d; ++ struct nouveau_grobj *m2mf; ++}; ++ ++static INLINE struct nvc0_screen * ++nvc0_screen(struct pipe_screen *screen) ++{ ++ return (struct nvc0_screen *)screen; ++} ++ ++/* Since a resource can be migrated, we need to decouple allocations from ++ * them. This struct is linked with fences for delayed freeing of allocs. ++ */ ++struct nvc0_mm_allocation { ++ struct nvc0_mm_allocation *next; ++ void *priv; ++ uint32_t offset; ++}; ++ ++static INLINE void ++nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm) ++{ ++ mm->next = nf->buffers; ++ nf->buffers = mm; ++} ++ ++extern struct nvc0_mman * ++nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type); ++ ++extern void ++nvc0_mm_destroy(struct nvc0_mman *); ++ ++extern struct nvc0_mm_allocation * ++nvc0_mm_allocate(struct nvc0_mman *, ++ uint32_t size, struct nouveau_bo **, uint32_t *offset); ++extern void ++nvc0_mm_free(struct nvc0_mm_allocation *); ++ ++void nvc0_screen_make_buffers_resident(struct nvc0_screen *); ++ ++int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); ++int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); ++ ++static INLINE void ++nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags) ++{ ++ struct nvc0_screen *screen = nvc0_screen(res->base.screen); ++ ++ if (res->mm) { ++ nvc0_fence_reference(&res->fence, screen->fence.current); ++ ++ if (flags & NOUVEAU_BO_WR) ++ nvc0_fence_reference(&res->fence_wr, screen->fence.current); ++ } ++} ++ ++static INLINE void ++nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) ++{ ++ struct nvc0_screen *screen = nvc0_screen(res->base.screen); ++ ++ nouveau_bo_validate(screen->base.channel, res->bo, flags); ++ ++ nvc0_resource_fence(res, flags); ++} ++ ++ ++boolean ++nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); ++ ++void ++nvc0_screen_fence_next(struct nvc0_screen *); ++ ++static INLINE boolean ++nvc0_screen_fence_emit(struct nvc0_screen *screen) ++{ ++ nvc0_fence_emit(screen->fence.current); ++ ++ return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); ++} ++ ++struct nvc0_format { ++ uint32_t rt; ++ uint32_t tic; ++ uint32_t vtx; ++ uint32_t usage; ++}; ++ ++extern const struct nvc0_format nvc0_format_table[]; ++ ++static INLINE void ++nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic) ++{ ++ if (tic->id >= 0) ++ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); ++} ++ ++static INLINE void ++nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc) ++{ ++ if (tsc->id >= 0) ++ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); ++} ++ ++static INLINE void ++nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic) ++{ ++ if (tic->id >= 0) { ++ screen->tic.entries[tic->id] = NULL; ++ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); ++ } ++} ++ ++static INLINE void ++nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc) ++{ ++ if (tsc->id >= 0) { ++ screen->tsc.entries[tsc->id] = NULL; ++ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); ++ } ++} ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c +new file mode 100644 +index 0000000..981b548 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c +@@ -0,0 +1,180 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "pipe/p_context.h" ++#include "pipe/p_defines.h" ++#include "pipe/p_state.h" ++#include "util/u_inlines.h" ++ ++#include "nvc0_context.h" ++ ++static boolean ++nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) ++{ ++ int ret; ++ unsigned size; ++ ++ if (prog->translated) ++ return TRUE; ++ ++ prog->translated = nvc0_program_translate(prog); ++ if (!prog->translated) ++ return FALSE; ++ ++ size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100); ++ ++ ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog, ++ &prog->res); ++ if (ret) ++ return FALSE; ++ ++ prog->code_base = prog->res->start; ++ ++ nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM, ++ prog->code_base, NVC0_SHADER_HEADER_SIZE, prog->hdr); ++ nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM, ++ prog->code_base + NVC0_SHADER_HEADER_SIZE, ++ prog->code_size, prog->code); ++ ++ BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1); ++ OUT_RING (nvc0->screen->base.channel, 0x1111); ++ ++ return TRUE; ++} ++ ++void ++nvc0_vertprog_validate(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_program *vp = nvc0->vertprog; ++ ++ if (nvc0->clip.nr > vp->vp.num_ucps) { ++ assert(nvc0->clip.nr <= 6); ++ vp->vp.num_ucps = 6; ++ ++ if (vp->translated) ++ nvc0_program_destroy(nvc0, vp); ++ } ++ ++ if (!nvc0_program_validate(nvc0, vp)) ++ return; ++ ++ BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); ++ OUT_RING (chan, 0x11); ++ OUT_RING (chan, vp->code_base); ++ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1); ++ OUT_RING (chan, vp->max_gpr); ++ ++ // BEGIN_RING(chan, RING_3D_(0x163c), 1); ++ // OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1); ++ OUT_RING (chan, 1); ++} ++ ++void ++nvc0_fragprog_validate(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_program *fp = nvc0->fragprog; ++ ++ if (!nvc0_program_validate(nvc0, fp)) ++ return; ++ ++ BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); ++ OUT_RING (chan, fp->fp.early_z); ++ BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2); ++ OUT_RING (chan, 0x51); ++ OUT_RING (chan, fp->code_base); ++ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1); ++ OUT_RING (chan, fp->max_gpr); ++ ++ BEGIN_RING(chan, RING_3D_(0x0360), 2); ++ OUT_RING (chan, 0x20164010); ++ OUT_RING (chan, 0x20); ++ BEGIN_RING(chan, RING_3D_(0x196c), 1); ++ OUT_RING (chan, fp->flags[0]); ++} ++ ++void ++nvc0_tctlprog_validate(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_program *tp = nvc0->tctlprog; ++ ++ if (!tp) { ++ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1); ++ OUT_RING (chan, 0x20); ++ return; ++ } ++ if (!nvc0_program_validate(nvc0, tp)) ++ return; ++ ++ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); ++ OUT_RING (chan, 0x21); ++ OUT_RING (chan, tp->code_base); ++ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1); ++ OUT_RING (chan, tp->max_gpr); ++} ++ ++void ++nvc0_tevlprog_validate(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_program *tp = nvc0->tevlprog; ++ ++ if (!tp) { ++ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); ++ OUT_RING (chan, 0x30); ++ return; ++ } ++ if (!nvc0_program_validate(nvc0, tp)) ++ return; ++ ++ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); ++ OUT_RING (chan, 0x31); ++ BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1); ++ OUT_RING (chan, tp->code_base); ++ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1); ++ OUT_RING (chan, tp->max_gpr); ++} ++ ++void ++nvc0_gmtyprog_validate(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_program *gp = nvc0->gmtyprog; ++ ++ if (!gp) { ++ BEGIN_RING(chan, RING_3D(GP_SELECT), 1); ++ OUT_RING (chan, 0x40); ++ return; ++ } ++ if (!nvc0_program_validate(nvc0, gp)) ++ return; ++ ++ BEGIN_RING(chan, RING_3D(GP_SELECT), 1); ++ OUT_RING (chan, 0x41); ++ BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1); ++ OUT_RING (chan, gp->code_base); ++ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); ++ OUT_RING (chan, gp->max_gpr); ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c +new file mode 100644 +index 0000000..c08f369 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_state.c +@@ -0,0 +1,865 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "pipe/p_defines.h" ++#include "util/u_inlines.h" ++ ++#include "tgsi/tgsi_parse.h" ++ ++#include "nvc0_stateobj.h" ++#include "nvc0_context.h" ++ ++#include "nvc0_3d.xml.h" ++#include "nv50_texture.xml.h" ++ ++#include "nouveau/nouveau_gldefs.h" ++ ++static INLINE uint32_t ++nvc0_colormask(unsigned mask) ++{ ++ uint32_t ret = 0; ++ ++ if (mask & PIPE_MASK_R) ++ ret |= 0x0001; ++ if (mask & PIPE_MASK_G) ++ ret |= 0x0010; ++ if (mask & PIPE_MASK_B) ++ ret |= 0x0100; ++ if (mask & PIPE_MASK_A) ++ ret |= 0x1000; ++ ++ return ret; ++} ++ ++static INLINE uint32_t ++nvc0_blend_fac(unsigned factor) ++{ ++ static const uint16_t bf[] = { ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */ ++ NV50_3D_BLEND_FACTOR_ONE, ++ NV50_3D_BLEND_FACTOR_SRC_COLOR, ++ NV50_3D_BLEND_FACTOR_SRC_ALPHA, ++ NV50_3D_BLEND_FACTOR_DST_ALPHA, ++ NV50_3D_BLEND_FACTOR_DST_COLOR, ++ NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE, ++ NV50_3D_BLEND_FACTOR_CONSTANT_COLOR, ++ NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA, ++ NV50_3D_BLEND_FACTOR_SRC1_COLOR, ++ NV50_3D_BLEND_FACTOR_SRC1_ALPHA, ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */ ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */ ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */ ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */ ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */ ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */ ++ NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */ ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR, ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, ++ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA ++ }; ++ ++ assert(factor < (sizeof(bf) / sizeof(bf[0]))); ++ return bf[factor]; ++} ++ ++static void * ++nvc0_blend_state_create(struct pipe_context *pipe, ++ const struct pipe_blend_state *cso) ++{ ++ struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); ++ int i; ++ ++ so->pipe = *cso; ++ ++ SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable); ++ ++ if (!cso->independent_blend_enable) { ++ SB_BEGIN_3D(so, BLEND_ENABLES, 1); ++ SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0); ++ ++ if (cso->rt[0].blend_enable) { ++ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); ++ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor)); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor)); ++ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor)); ++ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor)); ++ } ++ ++ SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1); ++ SB_DATA (so, nvc0_colormask(cso->rt[0].colormask)); ++ } else { ++ uint8_t en = 0; ++ ++ for (i = 0; i < 8; ++i) { ++ if (!cso->rt[i].blend_enable) ++ continue; ++ en |= 1 << i; ++ ++ SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6); ++ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor)); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor)); ++ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor)); ++ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor)); ++ } ++ SB_BEGIN_3D(so, BLEND_ENABLES, 1); ++ SB_DATA (so, en); ++ ++ SB_BEGIN_3D(so, COLOR_MASK(0), 8); ++ for (i = 0; i < 8; ++i) ++ SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); ++ } ++ ++ if (cso->logicop_enable) { ++ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); ++ SB_DATA (so, 1); ++ SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); ++ } else { ++ SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0); ++ } ++ ++ assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); ++ return so; ++} ++ ++static void ++nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->blend = hwcso; ++ nvc0->dirty |= NVC0_NEW_BLEND; ++} ++ ++static void ++nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso) ++{ ++ FREE(hwcso); ++} ++ ++static void * ++nvc0_rasterizer_state_create(struct pipe_context *pipe, ++ const struct pipe_rasterizer_state *cso) ++{ ++ struct nvc0_rasterizer_stateobj *so; ++ ++ so = CALLOC_STRUCT(nvc0_rasterizer_stateobj); ++ if (!so) ++ return NULL; ++ so->pipe = *cso; ++ ++#ifndef NVC0_SCISSORS_CLIPPING ++ SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor); ++#endif ++ ++ SB_BEGIN_3D(so, SHADE_MODEL, 1); ++ SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : ++ NVC0_3D_SHADE_MODEL_SMOOTH); ++ SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); ++ SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); ++ ++ SB_BEGIN_3D(so, LINE_WIDTH, 1); ++ SB_DATA (so, fui(cso->line_width)); ++ SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); ++ ++ SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); ++ if (cso->line_stipple_enable) { ++ SB_DATA (so, 1); ++ SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1); ++ SB_DATA (so, (cso->line_stipple_pattern << 8) | ++ cso->line_stipple_factor); ++ ++ } else { ++ SB_DATA (so, 0); ++ } ++ ++ SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex); ++ if (!cso->point_size_per_vertex) { ++ SB_BEGIN_3D(so, POINT_SIZE, 1); ++ SB_DATA (so, fui(cso->point_size)); ++ } ++ SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization); ++ SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth); ++ ++ SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1); ++ SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); ++ SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1); ++ SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); ++ SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth); ++ ++ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); ++ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); ++ SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : ++ NVC0_3D_FRONT_FACE_CW); ++ switch (cso->cull_face) { ++ case PIPE_FACE_FRONT_AND_BACK: ++ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); ++ break; ++ case PIPE_FACE_FRONT: ++ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); ++ break; ++ case PIPE_FACE_BACK: ++ default: ++ SB_DATA(so, NVC0_3D_CULL_FACE_BACK); ++ break; ++ } ++ ++ SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable); ++ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); ++ SB_DATA (so, cso->offset_point); ++ SB_DATA (so, cso->offset_line); ++ SB_DATA (so, cso->offset_tri); ++ ++ if (cso->offset_point || cso->offset_line || cso->offset_tri) { ++ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); ++ SB_DATA (so, fui(cso->offset_scale)); ++ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); ++ SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */ ++ } ++ ++ assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); ++ return (void *)so; ++} ++ ++static void ++nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->rast = hwcso; ++ nvc0->dirty |= NVC0_NEW_RASTERIZER; ++} ++ ++static void ++nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) ++{ ++ FREE(hwcso); ++} ++ ++static void * ++nvc0_zsa_state_create(struct pipe_context *pipe, ++ const struct pipe_depth_stencil_alpha_state *cso) ++{ ++ struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj); ++ ++ so->pipe = *cso; ++ ++ SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask); ++ SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1); ++ if (cso->depth.enabled) { ++ SB_DATA (so, 1); ++ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); ++ SB_DATA (so, nvgl_comparison_op(cso->depth.func)); ++ } else { ++ SB_DATA (so, 0); ++ } ++ ++ if (cso->stencil[0].enabled) { ++ SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5); ++ SB_DATA (so, 1); ++ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); ++ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); ++ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); ++ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); ++ SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); ++ SB_DATA (so, cso->stencil[0].writemask); ++ SB_DATA (so, cso->stencil[0].valuemask); ++ } else { ++ SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0); ++ } ++ ++ if (cso->stencil[1].enabled) { ++ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); ++ SB_DATA (so, 1); ++ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); ++ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); ++ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); ++ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); ++ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); ++ SB_DATA (so, cso->stencil[1].writemask); ++ SB_DATA (so, cso->stencil[1].valuemask); ++ } else { ++ SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); ++ } ++ ++ SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1); ++ if (cso->alpha.enabled) { ++ SB_DATA (so, 1); ++ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); ++ SB_DATA (so, fui(cso->alpha.ref_value)); ++ SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); ++ } else { ++ SB_DATA (so, 0); ++ } ++ ++ assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); ++ return (void *)so; ++} ++ ++static void ++nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->zsa = hwcso; ++ nvc0->dirty |= NVC0_NEW_ZSA; ++} ++ ++static void ++nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso) ++{ ++ FREE(hwcso); ++} ++ ++/* ====================== SAMPLERS AND TEXTURES ================================ ++ */ ++ ++#define NV50_TSC_WRAP_CASE(n) \ ++ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n ++ ++static INLINE unsigned ++nv50_tsc_wrap_mode(unsigned wrap) ++{ ++ switch (wrap) { ++ NV50_TSC_WRAP_CASE(REPEAT); ++ NV50_TSC_WRAP_CASE(MIRROR_REPEAT); ++ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); ++ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); ++ NV50_TSC_WRAP_CASE(CLAMP); ++ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); ++ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); ++ NV50_TSC_WRAP_CASE(MIRROR_CLAMP); ++ default: ++ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); ++ return NV50_TSC_WRAP_REPEAT; ++ } ++} ++ ++static void * ++nvc0_sampler_state_create(struct pipe_context *pipe, ++ const struct pipe_sampler_state *cso) ++{ ++ struct nvc0_tsc_entry *so = CALLOC_STRUCT(nvc0_tsc_entry); ++ float f[2]; ++ ++ so->id = -1; ++ ++ so->tsc[0] = (0x00026000 | ++ (nv50_tsc_wrap_mode(cso->wrap_s) << 0) | ++ (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | ++ (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); ++ ++ switch (cso->mag_img_filter) { ++ case PIPE_TEX_FILTER_LINEAR: ++ so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; ++ break; ++ case PIPE_TEX_FILTER_NEAREST: ++ default: ++ so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST; ++ break; ++ } ++ ++ switch (cso->min_img_filter) { ++ case PIPE_TEX_FILTER_LINEAR: ++ so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; ++ break; ++ case PIPE_TEX_FILTER_NEAREST: ++ default: ++ so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; ++ break; ++ } ++ ++ switch (cso->min_mip_filter) { ++ case PIPE_TEX_MIPFILTER_LINEAR: ++ so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; ++ break; ++ case PIPE_TEX_MIPFILTER_NEAREST: ++ so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; ++ break; ++ case PIPE_TEX_MIPFILTER_NONE: ++ default: ++ so->tsc[1] |= NV50_TSC_1_MIPF_NONE; ++ break; ++ } ++ ++ if (cso->max_anisotropy >= 16) ++ so->tsc[0] |= (7 << 20); ++ else ++ if (cso->max_anisotropy >= 12) ++ so->tsc[0] |= (6 << 20); ++ else { ++ so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; ++ ++ if (cso->max_anisotropy >= 4) ++ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; ++ else ++ if (cso->max_anisotropy >= 2) ++ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; ++ } ++ ++ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { ++ /* NOTE: must be deactivated for non-shadow textures */ ++ so->tsc[0] |= (1 << 9); ++ so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; ++ } ++ ++ f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f); ++ so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12; ++ ++ f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f); ++ f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f); ++ so->tsc[2] |= ++ (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff); ++ ++ so->tsc[4] = fui(cso->border_color[0]); ++ so->tsc[5] = fui(cso->border_color[1]); ++ so->tsc[6] = fui(cso->border_color[2]); ++ so->tsc[7] = fui(cso->border_color[3]); ++ ++ return (void *)so; ++} ++ ++static void ++nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) ++{ ++ unsigned s, i; ++ ++ for (s = 0; s < 5; ++s) ++ for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i) ++ if (nvc0_context(pipe)->samplers[s][i] == hwcso) ++ nvc0_context(pipe)->samplers[s][i] = NULL; ++ ++ nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nvc0_tsc_entry(hwcso)); ++ ++ FREE(hwcso); ++} ++ ++static INLINE void ++nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, ++ unsigned nr, void **hwcso) ++{ ++ unsigned i; ++ ++ for (i = 0; i < nr; ++i) { ++ struct nvc0_tsc_entry *old = nvc0->samplers[s][i]; ++ ++ nvc0->samplers[s][i] = nvc0_tsc_entry(hwcso[i]); ++ if (old) ++ nvc0_screen_tsc_unlock(nvc0->screen, old); ++ } ++ for (; i < nvc0->num_samplers[s]; ++i) ++ if (nvc0->samplers[s][i]) ++ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); ++ ++ nvc0->num_samplers[s] = nr; ++ ++ nvc0->dirty |= NVC0_NEW_SAMPLERS; ++} ++ ++static void ++nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) ++{ ++ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s); ++} ++ ++static void ++nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) ++{ ++ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s); ++} ++ ++static void ++nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) ++{ ++ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s); ++} ++ ++/* NOTE: only called when not referenced anywhere, won't be bound */ ++static void ++nvc0_sampler_view_destroy(struct pipe_context *pipe, ++ struct pipe_sampler_view *view) ++{ ++ pipe_resource_reference(&view->texture, NULL); ++ ++ nvc0_screen_tic_free(nvc0_context(pipe)->screen, nvc0_tic_entry(view)); ++ ++ FREE(nvc0_tic_entry(view)); ++} ++ ++static INLINE void ++nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, ++ unsigned nr, ++ struct pipe_sampler_view **views) ++{ ++ unsigned i; ++ ++ for (i = 0; i < nr; ++i) { ++ struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]); ++ if (old) ++ nvc0_screen_tic_unlock(nvc0->screen, old); ++ ++ pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]); ++ } ++ ++ for (i = nr; i < nvc0->num_textures[s]; ++i) { ++ struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]); ++ if (!old) ++ continue; ++ nvc0_screen_tic_unlock(nvc0->screen, old); ++ ++ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); ++ } ++ ++ nvc0->num_textures[s] = nr; ++ ++ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); ++ ++ nvc0->dirty |= NVC0_NEW_TEXTURES; ++} ++ ++static void ++nvc0_vp_set_sampler_views(struct pipe_context *pipe, ++ unsigned nr, ++ struct pipe_sampler_view **views) ++{ ++ nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views); ++} ++ ++static void ++nvc0_fp_set_sampler_views(struct pipe_context *pipe, ++ unsigned nr, ++ struct pipe_sampler_view **views) ++{ ++ nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views); ++} ++ ++static void ++nvc0_gp_set_sampler_views(struct pipe_context *pipe, ++ unsigned nr, ++ struct pipe_sampler_view **views) ++{ ++ nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views); ++} ++ ++/* ============================= SHADERS ======================================= ++ */ ++ ++static void * ++nvc0_sp_state_create(struct pipe_context *pipe, ++ const struct pipe_shader_state *cso, unsigned type) ++{ ++ struct nvc0_program *prog; ++ ++ prog = CALLOC_STRUCT(nvc0_program); ++ if (!prog) ++ return NULL; ++ ++ prog->type = type; ++ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); ++ ++ return (void *)prog; ++} ++ ++static void ++nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_program *prog = (struct nvc0_program *)hwcso; ++ ++ nvc0_program_destroy(nvc0_context(pipe), prog); ++ ++ FREE((void *)prog->pipe.tokens); ++ FREE(prog); ++} ++ ++static void * ++nvc0_vp_state_create(struct pipe_context *pipe, ++ const struct pipe_shader_state *cso) ++{ ++ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); ++} ++ ++static void ++nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->vertprog = hwcso; ++ nvc0->dirty |= NVC0_NEW_VERTPROG; ++} ++ ++static void * ++nvc0_fp_state_create(struct pipe_context *pipe, ++ const struct pipe_shader_state *cso) ++{ ++ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); ++} ++ ++static void ++nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->fragprog = hwcso; ++ nvc0->dirty |= NVC0_NEW_FRAGPROG; ++} ++ ++static void * ++nvc0_gp_state_create(struct pipe_context *pipe, ++ const struct pipe_shader_state *cso) ++{ ++ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); ++} ++ ++static void ++nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->gmtyprog = hwcso; ++ nvc0->dirty |= NVC0_NEW_GMTYPROG; ++} ++ ++static void ++nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, ++ struct pipe_resource *res) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ switch (shader) { ++ case PIPE_SHADER_VERTEX: shader = 0; break; ++ /* ++ case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break; ++ case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break; ++ */ ++ case PIPE_SHADER_GEOMETRY: shader = 3; break; ++ case PIPE_SHADER_FRAGMENT: shader = 4; break; ++ default: ++ assert(0); ++ break; ++ } ++ ++ if (nvc0->constbuf[shader][index]) ++ nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT, ++ nvc0_resource( ++ nvc0->constbuf[shader][index])); ++ ++ pipe_resource_reference(&nvc0->constbuf[shader][index], res); ++ ++ nvc0->constbuf_dirty[shader] |= 1 << index; ++ ++ nvc0->dirty |= NVC0_NEW_CONSTBUF; ++} ++ ++/* ============================================================================= ++ */ ++ ++static void ++nvc0_set_blend_color(struct pipe_context *pipe, ++ const struct pipe_blend_color *bcol) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->blend_colour = *bcol; ++ nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; ++} ++ ++static void ++nvc0_set_stencil_ref(struct pipe_context *pipe, ++ const struct pipe_stencil_ref *sr) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->stencil_ref = *sr; ++ nvc0->dirty |= NVC0_NEW_STENCIL_REF; ++} ++ ++static void ++nvc0_set_clip_state(struct pipe_context *pipe, ++ const struct pipe_clip_state *clip) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ const unsigned size = clip->nr * sizeof(clip->ucp[0]); ++ ++ memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size); ++ nvc0->clip.nr = clip->nr; ++ ++ nvc0->clip.depth_clamp = clip->depth_clamp; ++ ++ nvc0->dirty |= NVC0_NEW_CLIP; ++} ++ ++static void ++nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->sample_mask = sample_mask; ++ nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; ++} ++ ++ ++static void ++nvc0_set_framebuffer_state(struct pipe_context *pipe, ++ const struct pipe_framebuffer_state *fb) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->framebuffer = *fb; ++ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; ++} ++ ++static void ++nvc0_set_polygon_stipple(struct pipe_context *pipe, ++ const struct pipe_poly_stipple *stipple) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->stipple = *stipple; ++ nvc0->dirty |= NVC0_NEW_STIPPLE; ++} ++ ++static void ++nvc0_set_scissor_state(struct pipe_context *pipe, ++ const struct pipe_scissor_state *scissor) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->scissor = *scissor; ++ nvc0->dirty |= NVC0_NEW_SCISSOR; ++} ++ ++static void ++nvc0_set_viewport_state(struct pipe_context *pipe, ++ const struct pipe_viewport_state *vpt) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->viewport = *vpt; ++ nvc0->dirty |= NVC0_NEW_VIEWPORT; ++} ++ ++static void ++nvc0_set_vertex_buffers(struct pipe_context *pipe, ++ unsigned count, ++ const struct pipe_vertex_buffer *vb) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ unsigned i; ++ ++ for (i = 0; i < count; ++i) ++ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); ++ for (; i < nvc0->num_vtxbufs; ++i) ++ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); ++ ++ memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count); ++ nvc0->num_vtxbufs = count; ++ ++ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); ++ ++ nvc0->dirty |= NVC0_NEW_ARRAYS; ++} ++ ++static void ++nvc0_set_index_buffer(struct pipe_context *pipe, ++ const struct pipe_index_buffer *ib) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ if (ib) ++ memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf)); ++ else ++ nvc0->idxbuf.buffer = NULL; ++} ++ ++static void ++nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ ++ nvc0->vertex = hwcso; ++ nvc0->dirty |= NVC0_NEW_VERTEX; ++} ++ ++void ++nvc0_init_state_functions(struct nvc0_context *nvc0) ++{ ++ nvc0->pipe.create_blend_state = nvc0_blend_state_create; ++ nvc0->pipe.bind_blend_state = nvc0_blend_state_bind; ++ nvc0->pipe.delete_blend_state = nvc0_blend_state_delete; ++ ++ nvc0->pipe.create_rasterizer_state = nvc0_rasterizer_state_create; ++ nvc0->pipe.bind_rasterizer_state = nvc0_rasterizer_state_bind; ++ nvc0->pipe.delete_rasterizer_state = nvc0_rasterizer_state_delete; ++ ++ nvc0->pipe.create_depth_stencil_alpha_state = nvc0_zsa_state_create; ++ nvc0->pipe.bind_depth_stencil_alpha_state = nvc0_zsa_state_bind; ++ nvc0->pipe.delete_depth_stencil_alpha_state = nvc0_zsa_state_delete; ++ ++ nvc0->pipe.create_sampler_state = nvc0_sampler_state_create; ++ nvc0->pipe.delete_sampler_state = nvc0_sampler_state_delete; ++ nvc0->pipe.bind_vertex_sampler_states = nvc0_vp_sampler_states_bind; ++ nvc0->pipe.bind_fragment_sampler_states = nvc0_fp_sampler_states_bind; ++ nvc0->pipe.bind_geometry_sampler_states = nvc0_gp_sampler_states_bind; ++ ++ nvc0->pipe.create_sampler_view = nvc0_create_sampler_view; ++ nvc0->pipe.sampler_view_destroy = nvc0_sampler_view_destroy; ++ nvc0->pipe.set_vertex_sampler_views = nvc0_vp_set_sampler_views; ++ nvc0->pipe.set_fragment_sampler_views = nvc0_fp_set_sampler_views; ++ nvc0->pipe.set_geometry_sampler_views = nvc0_gp_set_sampler_views; ++ ++ nvc0->pipe.create_vs_state = nvc0_vp_state_create; ++ nvc0->pipe.create_fs_state = nvc0_fp_state_create; ++ nvc0->pipe.create_gs_state = nvc0_gp_state_create; ++ nvc0->pipe.bind_vs_state = nvc0_vp_state_bind; ++ nvc0->pipe.bind_fs_state = nvc0_fp_state_bind; ++ nvc0->pipe.bind_gs_state = nvc0_gp_state_bind; ++ nvc0->pipe.delete_vs_state = nvc0_sp_state_delete; ++ nvc0->pipe.delete_fs_state = nvc0_sp_state_delete; ++ nvc0->pipe.delete_gs_state = nvc0_sp_state_delete; ++ ++ nvc0->pipe.set_blend_color = nvc0_set_blend_color; ++ nvc0->pipe.set_stencil_ref = nvc0_set_stencil_ref; ++ nvc0->pipe.set_clip_state = nvc0_set_clip_state; ++ nvc0->pipe.set_sample_mask = nvc0_set_sample_mask; ++ nvc0->pipe.set_constant_buffer = nvc0_set_constant_buffer; ++ nvc0->pipe.set_framebuffer_state = nvc0_set_framebuffer_state; ++ nvc0->pipe.set_polygon_stipple = nvc0_set_polygon_stipple; ++ nvc0->pipe.set_scissor_state = nvc0_set_scissor_state; ++ nvc0->pipe.set_viewport_state = nvc0_set_viewport_state; ++ ++ nvc0->pipe.create_vertex_elements_state = nvc0_vertex_state_create; ++ nvc0->pipe.delete_vertex_elements_state = nvc0_vertex_state_delete; ++ nvc0->pipe.bind_vertex_elements_state = nvc0_vertex_state_bind; ++ ++ nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers; ++ nvc0->pipe.set_index_buffer = nvc0_set_index_buffer; ++} ++ +diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c +new file mode 100644 +index 0000000..25aec02 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c +@@ -0,0 +1,430 @@ ++ ++#include "nvc0_context.h" ++#include "os/os_time.h" ++ ++static void ++nvc0_validate_zcull(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct pipe_framebuffer_state *fb = &nvc0->framebuffer; ++ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); ++ struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture); ++ struct nouveau_bo *bo = mt->base.bo; ++ uint32_t size; ++ uint32_t offset = align(mt->total_size, 1 << 17); ++ unsigned width, height; ++ ++ assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); ++ ++ size = mt->total_size * 2; ++ ++ height = align(fb->height, 32); ++ width = fb->width % 224; ++ if (width) ++ width = fb->width + (224 - width); ++ else ++ width = fb->width; ++ ++ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ ++ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ offset += 1 << 17; ++ BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */ ++ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ BEGIN_RING(chan, RING_3D_(0x07e0), 2); ++ OUT_RING (chan, size); ++ OUT_RING (chan, size >> 16); ++ BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */ ++ OUT_RING (chan, 2); ++ BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */ ++ OUT_RING (chan, width); ++ OUT_RING (chan, height); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D_(0x15fc), 2); ++ OUT_RING (chan, 0); /* bits 0xffff */ ++ OUT_RING (chan, 0); /* bits 0xffff */ ++ BEGIN_RING(chan, RING_3D_(0x1958), 1); ++ OUT_RING (chan, 0); /* bits ~0 */ ++} ++ ++static void ++nvc0_validate_fb(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct pipe_framebuffer_state *fb = &nvc0->framebuffer; ++ unsigned i; ++ ++ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); ++ ++ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); ++ OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs); ++ BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2); ++ OUT_RING (chan, fb->width << 16); ++ OUT_RING (chan, fb->height << 16); ++ ++ for (i = 0; i < fb->nr_cbufs; ++i) { ++ struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); ++ struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); ++ struct nouveau_bo *bo = mt->base.bo; ++ uint32_t offset = sf->offset; ++ ++ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); ++ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RING (chan, sf->width); ++ OUT_RING (chan, sf->height); ++ OUT_RING (chan, nvc0_format_table[sf->base.format].rt); ++ OUT_RING (chan, (mt->layout_3d << 16) | ++ mt->level[sf->base.u.tex.level].tile_mode); ++ OUT_RING (chan, sf->depth); ++ OUT_RING (chan, mt->layer_stride >> 2); ++ ++ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, ++ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ } ++ ++ if (fb->zsbuf) { ++ struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); ++ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); ++ struct nouveau_bo *bo = mt->base.bo; ++ int unk = mt->base.base.target == PIPE_TEXTURE_2D; ++ uint32_t offset = sf->offset; ++ ++ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); ++ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt); ++ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); ++ OUT_RING (chan, mt->layer_stride >> 2); ++ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); ++ OUT_RING (chan, sf->width); ++ OUT_RING (chan, sf->height); ++ OUT_RING (chan, (unk << 16) | sf->depth); ++ ++ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, ++ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); ++ } else { ++ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); ++ OUT_RING (chan, 0); ++ } ++ ++#ifndef NVC0_SCISSORS_CLIPPING ++ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); ++ OUT_RING (chan, fb->width << 16); ++ OUT_RING (chan, fb->height << 16); ++#endif ++} ++ ++static void ++nvc0_validate_blend_colour(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4); ++ OUT_RINGf (chan, nvc0->blend_colour.color[0]); ++ OUT_RINGf (chan, nvc0->blend_colour.color[1]); ++ OUT_RINGf (chan, nvc0->blend_colour.color[2]); ++ OUT_RINGf (chan, nvc0->blend_colour.color[3]); ++} ++ ++static void ++nvc0_validate_stencil_ref(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1); ++ OUT_RING (chan, nvc0->stencil_ref.ref_value[0]); ++ BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1); ++ OUT_RING (chan, nvc0->stencil_ref.ref_value[1]); ++} ++ ++static void ++nvc0_validate_stipple(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ unsigned i; ++ ++ BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32); ++ for (i = 0; i < 32; ++i) ++ OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i])); ++} ++ ++static void ++nvc0_validate_scissor(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct pipe_scissor_state *s = &nvc0->scissor; ++#ifdef NVC0_SCISSORS_CLIPPING ++ struct pipe_viewport_state *vp = &nvc0->viewport; ++ int minx, maxx, miny, maxy; ++ ++ if (!(nvc0->dirty & ++ (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) && ++ nvc0->state.scissor == nvc0->rast->pipe.scissor) ++ return; ++ nvc0->state.scissor = nvc0->rast->pipe.scissor; ++ ++ if (nvc0->state.scissor) { ++ minx = s->minx; ++ maxx = s->maxx; ++ miny = s->miny; ++ maxy = s->maxy; ++ } else { ++ minx = 0; ++ maxx = nvc0->framebuffer.width; ++ miny = 0; ++ maxy = nvc0->framebuffer.height; ++ } ++ ++ minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); ++ maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); ++ miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); ++ maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); ++ ++ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); ++ OUT_RING (chan, (maxx << 16) | minx); ++ OUT_RING (chan, (maxy << 16) | miny); ++ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); ++ OUT_RING (chan, ((maxx - minx) << 16) | minx); ++ OUT_RING (chan, ((maxy - miny) << 16) | miny); ++#else ++ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); ++ OUT_RING (chan, (s->maxx << 16) | s->minx); ++ OUT_RING (chan, (s->maxy << 16) | s->miny); ++#endif ++} ++ ++static void ++nvc0_validate_viewport(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); ++ OUT_RINGf (chan, nvc0->viewport.translate[0]); ++ OUT_RINGf (chan, nvc0->viewport.translate[1]); ++ OUT_RINGf (chan, nvc0->viewport.translate[2]); ++ BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); ++ OUT_RINGf (chan, nvc0->viewport.scale[0]); ++ OUT_RINGf (chan, nvc0->viewport.scale[1]); ++ OUT_RINGf (chan, nvc0->viewport.scale[2]); ++ ++#ifdef NVC0_SCISSORS_CLIPPING ++ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); ++ OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]); ++ OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]); ++#endif ++} ++ ++static void ++nvc0_validate_clip(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ uint32_t clip; ++ ++ clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002; ++#ifndef NVC0_SCISSORS_CLIPPING ++ clip |= 0x1080; ++#endif ++ ++ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); ++ OUT_RING (chan, clip); ++ ++ if (nvc0->clip.nr) { ++ struct nouveau_bo *bo = nvc0->screen->uniforms; ++ ++ BEGIN_RING(chan, RING_3D(CB_SIZE), 3); ++ OUT_RING (chan, 256); ++ OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1); ++ OUT_RING (chan, 0); ++ OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4); ++ ++ BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1); ++ OUT_RING (chan, (1 << nvc0->clip.nr) - 1); ++ } else { ++ IMMED_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0); ++ } ++} ++ ++static void ++nvc0_validate_blend(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ WAIT_RING(chan, nvc0->blend->size); ++ OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size); ++} ++ ++static void ++nvc0_validate_zsa(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ WAIT_RING(chan, nvc0->zsa->size); ++ OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size); ++} ++ ++static void ++nvc0_validate_rasterizer(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ WAIT_RING(chan, nvc0->rast->size); ++ OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size); ++} ++ ++static void ++nvc0_constbufs_validate(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nouveau_bo *bo; ++ unsigned s; ++ ++ for (s = 0; s < 5; ++s) { ++ struct nvc0_resource *res; ++ int i; ++ ++ while (nvc0->constbuf_dirty[s]) { ++ unsigned base = 0; ++ unsigned offset = 0, words = 0; ++ boolean rebind = TRUE; ++ ++ i = ffs(nvc0->constbuf_dirty[s]) - 1; ++ nvc0->constbuf_dirty[s] &= ~(1 << i); ++ ++ res = nvc0_resource(nvc0->constbuf[s][i]); ++ if (!res) { ++ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); ++ OUT_RING (chan, (i << 4) | 0); ++ if (i == 0) ++ nvc0->state.uniform_buffer_bound[s] = 0; ++ continue; ++ } ++ ++ if (!nvc0_resource_mapped_by_gpu(&res->base)) { ++ if (i == 0) { ++ base = s << 16; ++ bo = nvc0->screen->uniforms; ++ ++ if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0) ++ rebind = FALSE; ++ else ++ nvc0->state.uniform_buffer_bound[s] = ++ align(res->base.width0, 0x100); ++ } else { ++ bo = res->bo; ++ } ++#if 0 ++ nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM, ++ base, res->base.width0, res->data); ++ BEGIN_RING(chan, RING_3D_(0x021c), 1); ++ OUT_RING (chan, 0x1111); ++#else ++ words = res->base.width0 / 4; ++#endif ++ } else { ++ bo = res->bo; ++ if (i == 0) ++ nvc0->state.uniform_buffer_bound[s] = 0; ++ } ++ ++ if (bo != nvc0->screen->uniforms) ++ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res, ++ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ ++ if (rebind) { ++ BEGIN_RING(chan, RING_3D(CB_SIZE), 3); ++ OUT_RING (chan, align(res->base.width0, 0x100)); ++ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1); ++ OUT_RING (chan, (i << 4) | 1); ++ } ++ ++ while (words) { ++ unsigned nr = AVAIL_RING(chan); ++ ++ if (nr < 16) { ++ FIRE_RING(chan); ++ continue; ++ } ++ nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); ++ ++ BEGIN_RING(chan, RING_3D(CB_SIZE), 3); ++ OUT_RING (chan, align(res->base.width0, 0x100)); ++ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1); ++ OUT_RING (chan, offset); ++ OUT_RINGp (chan, &res->data[offset], nr); ++ ++ offset += nr * 4; ++ words -= nr; ++ } ++ } ++ } ++} ++ ++static struct state_validate { ++ void (*func)(struct nvc0_context *); ++ uint32_t states; ++} validate_list[] = { ++ { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, ++ { nvc0_validate_blend, NVC0_NEW_BLEND }, ++ { nvc0_validate_zsa, NVC0_NEW_ZSA }, ++ { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, ++ { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, ++ { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, ++ { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, ++#ifdef NVC0_SCISSORS_CLIPPING ++ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | ++ NVC0_NEW_RASTERIZER | ++ NVC0_NEW_FRAMEBUFFER }, ++#else ++ { nvc0_validate_scissor, NVC0_NEW_SCISSOR }, ++#endif ++ { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, ++ { nvc0_validate_clip, NVC0_NEW_CLIP }, ++ { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, ++ { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG }, ++ { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, ++ { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, ++ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, ++ { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, ++ { nvc0_validate_textures, NVC0_NEW_TEXTURES }, ++ { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, ++ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS } ++}; ++#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) ++ ++boolean ++nvc0_state_validate(struct nvc0_context *nvc0) ++{ ++ unsigned i; ++#if 0 ++ if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */ ++ nvc0->dirty = 0xffffffff; ++#endif ++ nvc0->screen->cur_ctx = nvc0; ++ ++ if (nvc0->dirty) { ++ for (i = 0; i < validate_list_len; ++i) { ++ struct state_validate *validate = &validate_list[i]; ++ ++ if (nvc0->dirty & validate->states) ++ validate->func(nvc0); ++ } ++ nvc0->dirty = 0; ++ } ++ ++ nvc0_bufctx_emit_relocs(nvc0); ++ ++ return TRUE; ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h +new file mode 100644 +index 0000000..6c8028a +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h +@@ -0,0 +1,82 @@ ++ ++#ifndef __NVC0_STATEOBJ_H__ ++#define __NVC0_STATEOBJ_H__ ++ ++#include "pipe/p_state.h" ++ ++#define NVC0_SCISSORS_CLIPPING ++ ++#define SB_BEGIN_3D(so, m, s) \ ++ (so)->state[(so)->size++] = \ ++ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) ++ ++#define SB_IMMED_3D(so, m, d) \ ++ (so)->state[(so)->size++] = \ ++ (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) ++ ++#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) ++ ++struct nvc0_blend_stateobj { ++ struct pipe_blend_state pipe; ++ int size; ++ uint32_t state[72]; ++}; ++ ++struct nvc0_tsc_entry { ++ int id; ++ uint32_t tsc[8]; ++}; ++ ++static INLINE struct nvc0_tsc_entry * ++nvc0_tsc_entry(void *hwcso) ++{ ++ return (struct nvc0_tsc_entry *)hwcso; ++} ++ ++struct nvc0_tic_entry { ++ struct pipe_sampler_view pipe; ++ int id; ++ uint32_t tic[8]; ++}; ++ ++static INLINE struct nvc0_tic_entry * ++nvc0_tic_entry(struct pipe_sampler_view *view) ++{ ++ return (struct nvc0_tic_entry *)view; ++} ++ ++struct nvc0_rasterizer_stateobj { ++ struct pipe_rasterizer_state pipe; ++ int size; ++ uint32_t state[36]; ++}; ++ ++struct nvc0_zsa_stateobj { ++ struct pipe_depth_stencil_alpha_state pipe; ++ int size; ++ uint32_t state[29]; ++}; ++ ++struct nvc0_vertex_element { ++ struct pipe_vertex_element pipe; ++ uint32_t state; ++}; ++ ++struct nvc0_vertex_stateobj { ++ struct translate *translate; ++ unsigned num_elements; ++ uint32_t instance_elts; ++ uint32_t instance_bufs; ++ unsigned vtx_size; ++ unsigned vtx_per_packet_max; ++ struct nvc0_vertex_element element[1]; ++}; ++ ++/* will have to lookup index -> location qualifier from nvc0_program */ ++struct nvc0_tfb_state { ++ uint8_t varying_count[4]; ++ uint32_t stride[4]; ++ uint8_t varying_indices[1]; ++}; ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c +new file mode 100644 +index 0000000..cc0a656 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_surface.c +@@ -0,0 +1,377 @@ ++/* ++ * Copyright 2008 Ben Skeggs ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include ++ ++#include "pipe/p_defines.h" ++ ++#include "util/u_inlines.h" ++#include "util/u_pack_color.h" ++#include "util/u_format.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_resource.h" ++ ++#include "nv50_defs.xml.h" ++ ++/* return TRUE for formats that can be converted among each other by NVC0_2D */ ++static INLINE boolean ++nvc0_2d_format_faithful(enum pipe_format format) ++{ ++ switch (format) { ++ case PIPE_FORMAT_B8G8R8A8_UNORM: ++ case PIPE_FORMAT_B8G8R8X8_UNORM: ++ case PIPE_FORMAT_B8G8R8A8_SRGB: ++ case PIPE_FORMAT_B8G8R8X8_SRGB: ++ case PIPE_FORMAT_B5G6R5_UNORM: ++ case PIPE_FORMAT_B5G5R5A1_UNORM: ++ case PIPE_FORMAT_B10G10R10A2_UNORM: ++ case PIPE_FORMAT_R8_UNORM: ++ case PIPE_FORMAT_R32G32B32A32_FLOAT: ++ case PIPE_FORMAT_R32G32B32_FLOAT: ++ return TRUE; ++ default: ++ return FALSE; ++ } ++} ++ ++static INLINE uint8_t ++nvc0_2d_format(enum pipe_format format) ++{ ++ uint8_t id = nvc0_format_table[format].rt; ++ ++ /* Hardware values for color formats range from 0xc0 to 0xff, ++ * but the 2D engine doesn't support all of them. ++ */ ++ if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) ++ return id; ++ ++ switch (util_format_get_blocksize(format)) { ++ case 1: ++ return NV50_SURFACE_FORMAT_R8_UNORM; ++ case 2: ++ return NV50_SURFACE_FORMAT_R16_UNORM; ++ case 4: ++ return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; ++ default: ++ return 0; ++ } ++} ++ ++static int ++nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, ++ struct nvc0_miptree *mt, unsigned level, unsigned layer) ++{ ++ struct nouveau_bo *bo = mt->base.bo; ++ uint32_t width, height, depth; ++ uint32_t format; ++ uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; ++ uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); ++ uint32_t offset = mt->level[level].offset; ++ ++ format = nvc0_2d_format(mt->base.base.format); ++ if (!format) { ++ NOUVEAU_ERR("invalid/unsupported surface format: %s\n", ++ util_format_name(mt->base.base.format)); ++ return 1; ++ } ++ ++ width = u_minify(mt->base.base.width0, level); ++ height = u_minify(mt->base.base.height0, level); ++ ++ offset = mt->level[level].offset; ++ if (!mt->layout_3d) { ++ offset += mt->layer_stride * layer; ++ depth = 1; ++ layer = 0; ++ } else { ++ depth = u_minify(mt->base.base.depth0, level); ++ } ++ ++ if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) { ++ BEGIN_RING(chan, RING_2D_(mthd), 2); ++ OUT_RING (chan, format); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5); ++ OUT_RING (chan, mt->level[level].pitch); ++ OUT_RING (chan, width); ++ OUT_RING (chan, height); ++ OUT_RELOCh(chan, bo, offset, flags); ++ OUT_RELOCl(chan, bo, offset, flags); ++ } else { ++ BEGIN_RING(chan, RING_2D_(mthd), 5); ++ OUT_RING (chan, format); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, mt->level[level].tile_mode); ++ OUT_RING (chan, depth); ++ OUT_RING (chan, layer); ++ BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4); ++ OUT_RING (chan, width); ++ OUT_RING (chan, height); ++ OUT_RELOCh(chan, bo, offset, flags); ++ OUT_RELOCl(chan, bo, offset, flags); ++ } ++ ++#if 0 ++ if (dst) { ++ BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, width); ++ OUT_RING (chan, height); ++ } ++#endif ++ return 0; ++} ++ ++static int ++nvc0_2d_texture_do_copy(struct nouveau_channel *chan, ++ struct nvc0_miptree *dst, unsigned dst_level, ++ unsigned dx, unsigned dy, unsigned dz, ++ struct nvc0_miptree *src, unsigned src_level, ++ unsigned sx, unsigned sy, unsigned sz, ++ unsigned w, unsigned h) ++{ ++ int ret; ++ ++ ret = MARK_RING(chan, 2 * 16 + 32, 4); ++ if (ret) ++ return ret; ++ ++ ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz); ++ if (ret) ++ return ret; ++ ++ ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz); ++ if (ret) ++ return ret; ++ ++ /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */ ++ BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4); ++ OUT_RING (chan, dx); ++ OUT_RING (chan, dy); ++ OUT_RING (chan, w); ++ OUT_RING (chan, h); ++ BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, sx); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, sy); ++ ++ return 0; ++} ++ ++static void ++nvc0_resource_copy_region(struct pipe_context *pipe, ++ struct pipe_resource *dst, unsigned dst_level, ++ unsigned dstx, unsigned dsty, unsigned dstz, ++ struct pipe_resource *src, unsigned src_level, ++ const struct pipe_box *src_box) ++{ ++ struct nvc0_screen *screen = nvc0_context(pipe)->screen; ++ int ret; ++ unsigned dst_layer = dstz, src_layer = src_box->z; ++ ++ assert((src->format == dst->format) || ++ (nvc0_2d_format_faithful(src->format) && ++ nvc0_2d_format_faithful(dst->format))); ++ ++ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { ++ ret = nvc0_2d_texture_do_copy(screen->base.channel, ++ nvc0_miptree(dst), dst_level, ++ dstx, dsty, dst_layer, ++ nvc0_miptree(src), src_level, ++ src_box->x, src_box->y, src_layer, ++ src_box->width, src_box->height); ++ if (ret) ++ return; ++ } ++} ++ ++static void ++nvc0_clear_render_target(struct pipe_context *pipe, ++ struct pipe_surface *dst, ++ const float *rgba, ++ unsigned dstx, unsigned dsty, ++ unsigned width, unsigned height) ++{ ++ struct nvc0_context *nv50 = nvc0_context(pipe); ++ struct nvc0_screen *screen = nv50->screen; ++ struct nouveau_channel *chan = screen->base.channel; ++ struct nvc0_miptree *mt = nvc0_miptree(dst->texture); ++ struct nvc0_surface *sf = nvc0_surface(dst); ++ struct nouveau_bo *bo = mt->base.bo; ++ ++ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); ++ OUT_RINGf (chan, rgba[0]); ++ OUT_RINGf (chan, rgba[1]); ++ OUT_RINGf (chan, rgba[2]); ++ OUT_RINGf (chan, rgba[3]); ++ ++ if (MARK_RING(chan, 18, 2)) ++ return; ++ ++ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8); ++ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); ++ OUT_RING (chan, sf->width); ++ OUT_RING (chan, sf->height); ++ OUT_RING (chan, nvc0_format_table[dst->format].rt); ++ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, 0); ++ ++ /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ ++ ++ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); ++ OUT_RING (chan, (width << 16) | dstx); ++ OUT_RING (chan, (height << 16) | dsty); ++ ++ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); ++ OUT_RING (chan, 0x3c); ++ ++ nv50->dirty |= NVC0_NEW_FRAMEBUFFER; ++} ++ ++static void ++nvc0_clear_depth_stencil(struct pipe_context *pipe, ++ struct pipe_surface *dst, ++ unsigned clear_flags, ++ double depth, ++ unsigned stencil, ++ unsigned dstx, unsigned dsty, ++ unsigned width, unsigned height) ++{ ++ struct nvc0_context *nv50 = nvc0_context(pipe); ++ struct nvc0_screen *screen = nv50->screen; ++ struct nouveau_channel *chan = screen->base.channel; ++ struct nvc0_miptree *mt = nvc0_miptree(dst->texture); ++ struct nvc0_surface *sf = nvc0_surface(dst); ++ struct nouveau_bo *bo = mt->base.bo; ++ uint32_t mode = 0; ++ ++ if (clear_flags & PIPE_CLEAR_DEPTH) { ++ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); ++ OUT_RINGf (chan, depth); ++ mode |= NVC0_3D_CLEAR_BUFFERS_Z; ++ } ++ ++ if (clear_flags & PIPE_CLEAR_STENCIL) { ++ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); ++ OUT_RING (chan, stencil & 0xff); ++ mode |= NVC0_3D_CLEAR_BUFFERS_S; ++ } ++ ++ if (MARK_RING(chan, 17, 2)) ++ return; ++ ++ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); ++ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); ++ OUT_RING (chan, nvc0_format_table[dst->format].rt); ++ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); ++ OUT_RING (chan, 0); ++ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); ++ OUT_RING (chan, sf->width); ++ OUT_RING (chan, sf->height); ++ OUT_RING (chan, (1 << 16) | 1); ++ ++ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); ++ OUT_RING (chan, (width << 16) | dstx); ++ OUT_RING (chan, (height << 16) | dsty); ++ ++ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); ++ OUT_RING (chan, mode); ++ ++ nv50->dirty |= NVC0_NEW_FRAMEBUFFER; ++} ++ ++void ++nvc0_clear(struct pipe_context *pipe, unsigned buffers, ++ const float *rgba, double depth, unsigned stencil) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct pipe_framebuffer_state *fb = &nvc0->framebuffer; ++ unsigned i; ++ const unsigned dirty = nvc0->dirty; ++ uint32_t mode = 0; ++ ++ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ ++ nvc0->dirty &= NVC0_NEW_FRAMEBUFFER; ++ if (!nvc0_state_validate(nvc0)) ++ return; ++ ++ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { ++ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); ++ OUT_RINGf (chan, rgba[0]); ++ OUT_RINGf (chan, rgba[1]); ++ OUT_RINGf (chan, rgba[2]); ++ OUT_RINGf (chan, rgba[3]); ++ mode = ++ NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G | ++ NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A; ++ } ++ ++ if (buffers & PIPE_CLEAR_DEPTH) { ++ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1); ++ OUT_RING (chan, fui(depth)); ++ mode |= NVC0_3D_CLEAR_BUFFERS_Z; ++ } ++ ++ if (buffers & PIPE_CLEAR_STENCIL) { ++ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1); ++ OUT_RING (chan, stencil & 0xff); ++ mode |= NVC0_3D_CLEAR_BUFFERS_S; ++ } ++ ++ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); ++ OUT_RING (chan, mode); ++ ++ for (i = 1; i < fb->nr_cbufs; i++) { ++ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); ++ OUT_RING (chan, (i << 6) | 0x3c); ++ } ++ ++ nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER; ++} ++ ++void ++nvc0_init_surface_functions(struct nvc0_context *nvc0) ++{ ++ nvc0->pipe.resource_copy_region = nvc0_resource_copy_region; ++ nvc0->pipe.clear_render_target = nvc0_clear_render_target; ++ nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil; ++} ++ ++ +diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c +new file mode 100644 +index 0000000..b219f82 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_tex.c +@@ -0,0 +1,277 @@ ++/* ++ * Copyright 2008 Ben Skeggs ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "nvc0_context.h" ++#include "nvc0_resource.h" ++#include "nv50_texture.xml.h" ++ ++#include "util/u_format.h" ++ ++static INLINE uint32_t ++nv50_tic_swizzle(uint32_t tc, unsigned swz) ++{ ++ switch (swz) { ++ case PIPE_SWIZZLE_RED: ++ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; ++ case PIPE_SWIZZLE_GREEN: ++ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; ++ case PIPE_SWIZZLE_BLUE: ++ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; ++ case PIPE_SWIZZLE_ALPHA: ++ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; ++ case PIPE_SWIZZLE_ONE: ++ return NV50_TIC_MAP_ONE; ++ case PIPE_SWIZZLE_ZERO: ++ default: ++ return NV50_TIC_MAP_ZERO; ++ } ++} ++ ++struct pipe_sampler_view * ++nvc0_create_sampler_view(struct pipe_context *pipe, ++ struct pipe_resource *texture, ++ const struct pipe_sampler_view *templ) ++{ ++ const struct util_format_description *desc; ++ uint32_t *tic; ++ uint32_t swz[4]; ++ uint32_t depth; ++ struct nvc0_tic_entry *view; ++ struct nvc0_miptree *mt = nvc0_miptree(texture); ++ ++ view = MALLOC_STRUCT(nvc0_tic_entry); ++ if (!view) ++ return NULL; ++ ++ view->pipe = *templ; ++ view->pipe.reference.count = 1; ++ view->pipe.texture = NULL; ++ view->pipe.context = pipe; ++ ++ view->id = -1; ++ ++ pipe_resource_reference(&view->pipe.texture, texture); ++ ++ tic = &view->tic[0]; ++ ++ desc = util_format_description(mt->base.base.format); ++ ++ /* TIC[0] */ ++ ++ tic[0] = nvc0_format_table[view->pipe.format].tic; ++ ++ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r); ++ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g); ++ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b); ++ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a); ++ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | ++ (swz[0] << NV50_TIC_0_MAPR__SHIFT) | ++ (swz[1] << NV50_TIC_0_MAPG__SHIFT) | ++ (swz[2] << NV50_TIC_0_MAPB__SHIFT) | ++ (swz[3] << NV50_TIC_0_MAPA__SHIFT); ++ ++ /* tic[1] = mt->base.bo->offset; */ ++ tic[2] = /* mt->base.bo->offset >> 32 */ 0; ++ ++ tic[2] |= 0x10001000 | /* NV50_TIC_2_NO_BORDER */ 0x40000000; ++ ++ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ++ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; ++ ++ if (mt->base.base.target != PIPE_TEXTURE_RECT) ++ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; ++ ++ tic[2] |= ++ ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) | ++ ((mt->base.bo->tile_mode & 0xf00) << (25 - 8)); ++ ++ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); ++ ++ switch (mt->base.base.target) { ++ case PIPE_TEXTURE_1D: ++ tic[2] |= NV50_TIC_2_TARGET_1D; ++ break; ++ case PIPE_TEXTURE_2D: ++ tic[2] |= NV50_TIC_2_TARGET_2D; ++ break; ++ case PIPE_TEXTURE_RECT: ++ tic[2] |= NV50_TIC_2_TARGET_RECT; ++ break; ++ case PIPE_TEXTURE_3D: ++ tic[2] |= NV50_TIC_2_TARGET_3D; ++ break; ++ case PIPE_TEXTURE_CUBE: ++ depth /= 6; ++ if (depth > 1) ++ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; ++ else ++ tic[2] |= NV50_TIC_2_TARGET_CUBE; ++ break; ++ case PIPE_TEXTURE_1D_ARRAY: ++ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; ++ break; ++ case PIPE_TEXTURE_2D_ARRAY: ++ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; ++ break; ++ case PIPE_BUFFER: ++ tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18); ++ default: ++ NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); ++ return FALSE; ++ } ++ ++ if (mt->base.base.target == PIPE_BUFFER) ++ tic[3] = mt->base.base.width0; ++ else ++ tic[3] = 0x00300000; ++ ++ tic[4] = (1 << 31) | mt->base.base.width0; ++ ++ tic[5] = mt->base.base.height0 & 0xffff; ++ tic[5] |= depth << 16; ++ tic[5] |= mt->base.base.last_level << 28; ++ ++ tic[6] = 0x03000000; ++ ++ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; ++ ++ return &view->pipe; ++} ++ ++static boolean ++nvc0_validate_tic(struct nvc0_context *nvc0, int s) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nouveau_bo *txc = nvc0->screen->txc; ++ unsigned i; ++ boolean need_flush = FALSE; ++ ++ for (i = 0; i < nvc0->num_textures[s]; ++i) { ++ struct nvc0_tic_entry *tic = nvc0_tic_entry(nvc0->textures[s][i]); ++ struct nvc0_resource *res; ++ ++ if (!tic) { ++ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); ++ OUT_RING (chan, (i << 1) | 0); ++ continue; ++ } ++ res = &nvc0_miptree(tic->pipe.texture)->base; ++ ++ if (tic->id < 0) { ++ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); ++ ++ MARK_RING (chan, 9 + 8, 4); ++ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); ++ OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); ++ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); ++ OUT_RING (chan, 32); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_MF(EXEC), 1); ++ OUT_RING (chan, 0x100111); ++ BEGIN_RING_NI(chan, RING_MF(DATA), 8); ++ OUT_RING (chan, tic->tic[0]); ++ OUT_RELOCl(chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ OUT_RELOC (chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | ++ NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]); ++ OUT_RINGp (chan, &tic->tic[3], 5); ++ ++ need_flush = TRUE; ++ } ++ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); ++ ++ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, ++ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); ++ ++ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); ++ OUT_RING (chan, (tic->id << 9) | (i << 1) | 1); ++ } ++ for (; i < nvc0->state.num_textures[s]; ++i) { ++ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1); ++ OUT_RING (chan, (i << 1) | 0); ++ } ++ nvc0->state.num_textures[s] = nvc0->num_textures[s]; ++ ++ return need_flush; ++} ++ ++void nvc0_validate_textures(struct nvc0_context *nvc0) ++{ ++ boolean need_flush; ++ ++ need_flush = nvc0_validate_tic(nvc0, 0); ++ need_flush |= nvc0_validate_tic(nvc0, 4); ++ ++ if (need_flush) { ++ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1); ++ OUT_RING (nvc0->screen->base.channel, 0); ++ } ++} ++ ++static boolean ++nvc0_validate_tsc(struct nvc0_context *nvc0, int s) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ unsigned i; ++ boolean need_flush = FALSE; ++ ++ for (i = 0; i < nvc0->num_samplers[s]; ++i) { ++ struct nvc0_tsc_entry *tsc = nvc0_tsc_entry(nvc0->samplers[s][i]); ++ ++ if (!tsc) { ++ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); ++ OUT_RING (chan, (i << 4) | 0); ++ continue; ++ } ++ if (tsc->id < 0) { ++ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); ++ ++ nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM, ++ 65536 + tsc->id * 32, 32, tsc->tsc); ++ need_flush = TRUE; ++ } ++ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); ++ ++ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); ++ OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1); ++ } ++ for (; i < nvc0->state.num_samplers[s]; ++i) { ++ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1); ++ OUT_RING (chan, (i << 4) | 0); ++ } ++ nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; ++ ++ return need_flush; ++} ++ ++void nvc0_validate_samplers(struct nvc0_context *nvc0) ++{ ++ boolean need_flush; ++ ++ need_flush = nvc0_validate_tsc(nvc0, 0); ++ need_flush |= nvc0_validate_tsc(nvc0, 4); ++ ++ if (need_flush) { ++ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1); ++ OUT_RING (nvc0->screen->base.channel, 0); ++ } ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +new file mode 100644 +index 0000000..950bee2 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +@@ -0,0 +1,2018 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include ++ ++#define NOUVEAU_DEBUG 1 ++ ++#include "pipe/p_shader_tokens.h" ++#include "tgsi/tgsi_parse.h" ++#include "tgsi/tgsi_util.h" ++#include "tgsi/tgsi_dump.h" ++#include "util/u_dynarray.h" ++ ++#include "nvc0_pc.h" ++#include "nvc0_program.h" ++ ++/* Arbitrary internal limits. */ ++#define BLD_MAX_TEMPS 64 ++#define BLD_MAX_ADDRS 4 ++#define BLD_MAX_PREDS 4 ++#define BLD_MAX_IMMDS 128 ++#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS ++ ++#define BLD_MAX_COND_NESTING 8 ++#define BLD_MAX_LOOP_NESTING 4 ++#define BLD_MAX_CALL_NESTING 2 ++ ++/* This structure represents a TGSI register. */ ++struct bld_register { ++ struct nv_value *current; ++ /* collect all SSA values assigned to it */ ++ struct util_dynarray vals; ++ /* 1 bit per loop level, indicates if used/defd, reset when loop ends */ ++ uint16_t loop_use; ++ uint16_t loop_def; ++}; ++ ++static INLINE struct nv_value ** ++bld_register_access(struct bld_register *reg, unsigned i) ++{ ++ return util_dynarray_element(®->vals, struct nv_value *, i); ++} ++ ++static INLINE void ++bld_register_add_val(struct bld_register *reg, struct nv_value *val) ++{ ++ util_dynarray_append(®->vals, struct nv_value *, val); ++} ++ ++static INLINE boolean ++bld_register_del_val(struct bld_register *reg, struct nv_value *val) ++{ ++ unsigned i; ++ ++ for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i) ++ if (*bld_register_access(reg, i - 1) == val) ++ break; ++ if (!i) ++ return FALSE; ++ ++ if (i != reg->vals.size / sizeof(struct nv_value *)) ++ *bld_register_access(reg, i - 1) = util_dynarray_pop(®->vals, ++ struct nv_value *); ++ else ++ reg->vals.size -= sizeof(struct nv_value *); ++ ++ return TRUE; ++} ++ ++struct bld_context { ++ struct nvc0_translation_info *ti; ++ ++ struct nv_pc *pc; ++ struct nv_basic_block *b; ++ ++ struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING]; ++ int call_lvl; ++ ++ struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING]; ++ struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING]; ++ struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING]; ++ int cond_lvl; ++ struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING]; ++ struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING]; ++ int loop_lvl; ++ ++ ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */ ++ ++ struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */ ++ struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */ ++ struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ ++ struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */ ++ ++ uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; ++ int hpos_index; ++ ++ struct nv_value *zero; ++ struct nv_value *frag_coord[4]; ++ ++ /* wipe on new BB */ ++ struct nv_value *saved_sysvals[4]; ++ struct nv_value *saved_addr[4][2]; ++ struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4]; ++ struct nv_value *saved_immd[BLD_MAX_IMMDS]; ++ uint num_immds; ++}; ++ ++static INLINE ubyte ++bld_register_file(struct bld_context *bld, struct bld_register *reg) ++{ ++ if (reg < &bld->avs[0][0]) return NV_FILE_GPR; ++ else ++ if (reg < &bld->pvs[0][0]) return NV_FILE_GPR; ++ else ++ if (reg < &bld->ovs[0][0]) return NV_FILE_PRED; ++ else ++ return NV_FILE_MEM_V; ++} ++ ++static INLINE struct nv_value * ++bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c) ++{ ++ regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl; ++ return regs[i * 4 + c].current; ++} ++ ++static struct nv_value * ++bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *); ++ ++/* If a variable is defined in a loop without prior use, we don't need ++ * a phi in the loop header to account for backwards flow. ++ * ++ * However, if this variable is then also used outside the loop, we do ++ * need a phi after all. But we must not use this phi's def inside the ++ * loop, so we can eliminate the phi if it is unused later. ++ */ ++static INLINE void ++bld_store(struct bld_context *bld, ++ struct bld_register *regs, int i, int c, struct nv_value *val) ++{ ++ const uint16_t m = 1 << bld->loop_lvl; ++ struct bld_register *reg = ®s[i * 4 + c]; ++ ++ if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use))) ++ bld_loop_phi(bld, reg, val); ++ ++ reg->current = val; ++ bld_register_add_val(reg, reg->current); ++ ++ reg->loop_def |= 1 << bld->loop_lvl; ++} ++ ++#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c) ++#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v)) ++#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c) ++#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v)) ++#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c) ++#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v)) ++#define STORE_OUTP(i, c, v) \ ++ do { \ ++ bld_store(bld, &bld->ovs[0][0], i, c, (v)); \ ++ bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ ++ } while (0) ++ ++static INLINE void ++bld_clear_def_use(struct bld_register *regs, int n, int lvl) ++{ ++ int i; ++ const uint16_t mask = ~(1 << lvl); ++ ++ for (i = 0; i < n * 4; ++i) { ++ regs[i].loop_def &= mask; ++ regs[i].loop_use &= mask; ++ } ++} ++ ++static INLINE void ++bld_warn_uninitialized(struct bld_context *bld, int kind, ++ struct bld_register *reg, struct nv_basic_block *b) ++{ ++#ifdef NOUVEAU_DEBUG ++ long i = (reg - &bld->tvs[0][0]) / 4; ++ long c = (reg - &bld->tvs[0][0]) & 3; ++ ++ if (c == 3) ++ c = -1; ++ debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", ++ i, (int)('x' + c), kind ? "may be" : "is", b->id); ++#endif ++} ++ ++static INLINE struct nv_value * ++bld_def(struct nv_instruction *i, int c, struct nv_value *value) ++{ ++ i->def[c] = value; ++ value->insn = i; ++ return value; ++} ++ ++static INLINE struct nv_value * ++find_by_bb(struct bld_register *reg, struct nv_basic_block *b) ++{ ++ int i; ++ ++ if (reg->current && reg->current->insn->bb == b) ++ return reg->current; ++ ++ for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i) ++ if ((*bld_register_access(reg, i))->insn->bb == b) ++ return *bld_register_access(reg, i); ++ return NULL; ++} ++ ++/* Fetch value from register that was defined in the specified BB, ++ * or search for first definitions in all of its predecessors. ++ */ ++static void ++fetch_by_bb(struct bld_register *reg, ++ struct nv_value **vals, int *n, ++ struct nv_basic_block *b) ++{ ++ int i; ++ struct nv_value *val; ++ ++ assert(*n < 16); /* MAX_COND_NESTING */ ++ ++ val = find_by_bb(reg, b); ++ if (val) { ++ for (i = 0; i < *n; ++i) ++ if (vals[i] == val) ++ return; ++ vals[(*n)++] = val; ++ return; ++ } ++ for (i = 0; i < b->num_in; ++i) ++ if (!IS_WALL_EDGE(b->in_kind[i])) ++ fetch_by_bb(reg, vals, n, b->in[i]); ++} ++ ++static INLINE struct nv_value * ++bld_load_imm_u32(struct bld_context *bld, uint32_t u); ++ ++static INLINE struct nv_value * ++bld_undef(struct bld_context *bld, ubyte file) ++{ ++ struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF); ++ ++ return bld_def(nvi, 0, new_value(bld->pc, file, 4)); ++} ++ ++static struct nv_value * ++bld_phi(struct bld_context *bld, struct nv_basic_block *b, ++ struct bld_register *reg) ++{ ++ struct nv_basic_block *in; ++ struct nv_value *vals[16] = { NULL }; ++ struct nv_value *val; ++ struct nv_instruction *phi; ++ int i, j, n; ++ ++ do { ++ i = n = 0; ++ fetch_by_bb(reg, vals, &n, b); ++ ++ if (!n) { ++ bld_warn_uninitialized(bld, 0, reg, b); ++ return NULL; ++ } ++ ++ if (n == 1) { ++ if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb)) ++ break; ++ ++ bld_warn_uninitialized(bld, 1, reg, b); ++ ++ /* back-tracking to insert missing value of other path */ ++ in = b; ++ while (in->in[0]) { ++ if (in->num_in == 1) { ++ in = in->in[0]; ++ } else { ++ if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b)) ++ in = in->in[0]; ++ else ++ if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b)) ++ in = in->in[1]; ++ else ++ in = in->in[0]; ++ } ++ } ++ bld->pc->current_block = in; ++ ++ /* should make this a no-op */ ++ bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file)); ++ continue; ++ } ++ ++ for (i = 0; i < n; ++i) { ++ /* if value dominates b, continue to the redefinitions */ ++ if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb)) ++ continue; ++ ++ /* if value dominates any in-block, b should be the dom frontier */ ++ for (j = 0; j < b->num_in; ++j) ++ if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb)) ++ break; ++ /* otherwise, find the dominance frontier and put the phi there */ ++ if (j == b->num_in) { ++ in = nvc0_bblock_dom_frontier(vals[i]->insn->bb); ++ val = bld_phi(bld, in, reg); ++ bld_register_add_val(reg, val); ++ break; ++ } ++ } ++ } while(i < n); ++ ++ bld->pc->current_block = b; ++ ++ if (n == 1) ++ return vals[0]; ++ ++ phi = new_instruction(bld->pc, NV_OP_PHI); ++ ++ bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size)); ++ for (i = 0; i < n; ++i) ++ nv_reference(bld->pc, phi, i, vals[i]); ++ ++ return phi->def[0]; ++} ++ ++/* Insert a phi function in the loop header. ++ * For nested loops, we need to insert phi functions in all the outer ++ * loop headers if they don't have one yet. ++ * ++ * @def: redefinition from inside loop, or NULL if to be replaced later ++ */ ++static struct nv_value * ++bld_loop_phi(struct bld_context *bld, struct bld_register *reg, ++ struct nv_value *def) ++{ ++ struct nv_instruction *phi; ++ struct nv_basic_block *bb = bld->pc->current_block; ++ struct nv_value *val = NULL; ++ ++ if (bld->loop_lvl > 1) { ++ --bld->loop_lvl; ++ if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) ++ val = bld_loop_phi(bld, reg, NULL); ++ ++bld->loop_lvl; ++ } ++ ++ if (!val) ++ val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */ ++ if (!val) { ++ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; ++ val = bld_undef(bld, bld_register_file(bld, reg)); ++ } ++ ++ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]; ++ ++ phi = new_instruction(bld->pc, NV_OP_PHI); ++ ++ bld_def(phi, 0, new_value_like(bld->pc, val)); ++ if (!def) ++ def = phi->def[0]; ++ ++ bld_register_add_val(reg, phi->def[0]); ++ ++ phi->target = (struct nv_basic_block *)reg; /* cheat */ ++ ++ nv_reference(bld->pc, phi, 0, val); ++ nv_reference(bld->pc, phi, 1, def); ++ ++ bld->pc->current_block = bb; ++ ++ return phi->def[0]; ++} ++ ++static INLINE struct nv_value * ++bld_fetch_global(struct bld_context *bld, struct bld_register *reg) ++{ ++ const uint16_t m = 1 << bld->loop_lvl; ++ const uint16_t use = reg->loop_use; ++ ++ reg->loop_use |= m; ++ ++ /* If neither used nor def'd inside the loop, build a phi in foresight, ++ * so we don't have to replace stuff later on, which requires tracking. ++ */ ++ if (bld->loop_lvl && !((use | reg->loop_def) & m)) ++ return bld_loop_phi(bld, reg, NULL); ++ ++ return bld_phi(bld, bld->pc->current_block, reg); ++} ++ ++static INLINE struct nv_value * ++bld_imm_u32(struct bld_context *bld, uint32_t u) ++{ ++ int i; ++ unsigned n = bld->num_immds; ++ ++ for (i = 0; i < n; ++i) ++ if (bld->saved_immd[i]->reg.imm.u32 == u) ++ return bld->saved_immd[i]; ++ ++ assert(n < BLD_MAX_IMMDS); ++ bld->num_immds++; ++ ++ bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4); ++ bld->saved_immd[n]->reg.imm.u32 = u; ++ return bld->saved_immd[n]; ++} ++ ++static void ++bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *, ++ struct nv_value *); ++ ++/* Replace the source of the phi in the loop header by the last assignment, ++ * or eliminate the phi function if there is no assignment inside the loop. ++ * ++ * Redundancy situation 1 - (used) but (not redefined) value: ++ * %3 = phi %0, %3 = %3 is used ++ * %3 = phi %0, %4 = is new definition ++ * ++ * Redundancy situation 2 - (not used) but (redefined) value: ++ * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE ++ */ ++static void ++bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) ++{ ++ struct nv_basic_block *save = bld->pc->current_block; ++ struct nv_instruction *phi, *next; ++ struct nv_value *val; ++ struct bld_register *reg; ++ int i, s, n; ++ ++ for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) { ++ next = phi->next; ++ ++ reg = (struct bld_register *)phi->target; ++ phi->target = NULL; ++ ++ for (s = 1, n = 0; n < bb->num_in; ++n) { ++ if (bb->in_kind[n] != CFG_EDGE_BACK) ++ continue; ++ ++ assert(s < 4); ++ bld->pc->current_block = bb->in[n]; ++ val = bld_fetch_global(bld, reg); ++ ++ for (i = 0; i < 4; ++i) ++ if (phi->src[i] && phi->src[i]->value == val) ++ break; ++ if (i == 4) ++ nv_reference(bld->pc, phi, s++, val); ++ } ++ bld->pc->current_block = save; ++ ++ if (phi->src[0]->value == phi->def[0] || ++ phi->src[0]->value == phi->src[1]->value) ++ s = 1; ++ else ++ if (phi->src[1]->value == phi->def[0]) ++ s = 0; ++ else ++ continue; ++ ++ if (s >= 0) { ++ /* eliminate the phi */ ++ bld_register_del_val(reg, phi->def[0]); ++ ++ ++bld->pc->pass_seq; ++ bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value); ++ ++ nvc0_insn_delete(phi); ++ } ++ } ++} ++ ++static INLINE struct nv_value * ++bld_imm_f32(struct bld_context *bld, float f) ++{ ++ return bld_imm_u32(bld, fui(f)); ++} ++ ++static struct nv_value * ++bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0) ++{ ++ struct nv_instruction *insn = new_instruction(bld->pc, opcode); ++ ++ nv_reference(bld->pc, insn, 0, src0); ++ ++ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); ++} ++ ++static struct nv_value * ++bld_insn_2(struct bld_context *bld, uint opcode, ++ struct nv_value *src0, struct nv_value *src1) ++{ ++ struct nv_instruction *insn = new_instruction(bld->pc, opcode); ++ ++ nv_reference(bld->pc, insn, 0, src0); ++ nv_reference(bld->pc, insn, 1, src1); ++ ++ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); ++} ++ ++static struct nv_value * ++bld_insn_3(struct bld_context *bld, uint opcode, ++ struct nv_value *src0, struct nv_value *src1, ++ struct nv_value *src2) ++{ ++ struct nv_instruction *insn = new_instruction(bld->pc, opcode); ++ ++ nv_reference(bld->pc, insn, 0, src0); ++ nv_reference(bld->pc, insn, 1, src1); ++ nv_reference(bld->pc, insn, 2, src2); ++ ++ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); ++} ++ ++static INLINE void ++bld_src_predicate(struct bld_context *bld, ++ struct nv_instruction *nvi, int s, struct nv_value *val) ++{ ++ nvi->predicate = s; ++ nv_reference(bld->pc, nvi, s, val); ++} ++ ++static INLINE void ++bld_src_pointer(struct bld_context *bld, ++ struct nv_instruction *nvi, int s, struct nv_value *val) ++{ ++ nvi->indirect = s; ++ nv_reference(bld->pc, nvi, s, val); ++} ++ ++static void ++bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, ++ struct nv_value *val) ++{ ++ struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST); ++ struct nv_value *loc; ++ ++ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); ++ ++ loc->reg.id = ofst * 4; ++ ++ nv_reference(bld->pc, insn, 0, loc); ++ nv_reference(bld->pc, insn, 1, ptr); ++ nv_reference(bld->pc, insn, 2, val); ++} ++ ++static struct nv_value * ++bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) ++{ ++ struct nv_value *loc, *val; ++ ++ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); ++ ++ loc->reg.address = ofst * 4; ++ ++ val = bld_insn_2(bld, NV_OP_LD, loc, ptr); ++ ++ return val; ++} ++ ++static struct nv_value * ++bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e) ++{ ++ struct nv_value *val; ++ ++ val = bld_insn_1(bld, NV_OP_LG2, x); ++ val = bld_insn_2(bld, NV_OP_MUL_F32, e, val); ++ ++ val = bld_insn_1(bld, NV_OP_PREEX2, val); ++ val = bld_insn_1(bld, NV_OP_EX2, val); ++ ++ return val; ++} ++ ++static INLINE struct nv_value * ++bld_load_imm_f32(struct bld_context *bld, float f) ++{ ++ if (f == 0.0f) ++ return bld->zero; ++ return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); ++} ++ ++static INLINE struct nv_value * ++bld_load_imm_u32(struct bld_context *bld, uint32_t u) ++{ ++ if (u == 0) ++ return bld->zero; ++ return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u)); ++} ++ ++static INLINE struct nv_value * ++bld_setp(struct bld_context *bld, uint op, uint8_t cc, ++ struct nv_value *src0, struct nv_value *src1) ++{ ++ struct nv_value *val = bld_insn_2(bld, op, src0, src1); ++ ++ val->reg.file = NV_FILE_PRED; ++ val->reg.size = 1; ++ val->insn->set_cond = cc & 0xf; ++ return val; ++} ++ ++static INLINE struct nv_value * ++bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src) ++{ ++ struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src); ++ val->insn->ext.cvt.d = dt; ++ val->insn->ext.cvt.s = st; ++ return val; ++} ++ ++static void ++bld_kil(struct bld_context *bld, struct nv_value *src) ++{ ++ struct nv_instruction *nvi; ++ ++ src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero); ++ ++ nvi = new_instruction(bld->pc, NV_OP_KIL); ++ nvi->fixed = 1; ++ ++ bld_src_predicate(bld, nvi, 0, src); ++} ++ ++static void ++bld_flow(struct bld_context *bld, uint opcode, ++ struct nv_value *src, struct nv_basic_block *target, ++ boolean reconverge) ++{ ++ struct nv_instruction *nvi; ++ ++ if (reconverge) ++ new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1; ++ ++ nvi = new_instruction(bld->pc, opcode); ++ nvi->target = target; ++ nvi->terminator = 1; ++ if (src) ++ bld_src_predicate(bld, nvi, 0, src); ++} ++ ++static ubyte ++translate_setcc(unsigned opcode) ++{ ++ switch (opcode) { ++ case TGSI_OPCODE_SLT: return NV_CC_LT; ++ case TGSI_OPCODE_SGE: return NV_CC_GE; ++ case TGSI_OPCODE_SEQ: return NV_CC_EQ; ++ case TGSI_OPCODE_SGT: return NV_CC_GT; ++ case TGSI_OPCODE_SLE: return NV_CC_LE; ++ case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U; ++ case TGSI_OPCODE_STR: return NV_CC_TR; ++ case TGSI_OPCODE_SFL: return NV_CC_FL; ++ ++ case TGSI_OPCODE_ISLT: return NV_CC_LT; ++ case TGSI_OPCODE_ISGE: return NV_CC_GE; ++ case TGSI_OPCODE_USEQ: return NV_CC_EQ; ++ case TGSI_OPCODE_USGE: return NV_CC_GE; ++ case TGSI_OPCODE_USLT: return NV_CC_LT; ++ case TGSI_OPCODE_USNE: return NV_CC_NE; ++ default: ++ assert(0); ++ return NV_CC_FL; ++ } ++} ++ ++static uint ++translate_opcode(uint opcode) ++{ ++ switch (opcode) { ++ case TGSI_OPCODE_ABS: return NV_OP_ABS_F32; ++ case TGSI_OPCODE_ADD: return NV_OP_ADD_F32; ++ case TGSI_OPCODE_SUB: return NV_OP_SUB_F32; ++ case TGSI_OPCODE_UADD: return NV_OP_ADD_B32; ++ case TGSI_OPCODE_AND: return NV_OP_AND; ++ case TGSI_OPCODE_EX2: return NV_OP_EX2; ++ case TGSI_OPCODE_CEIL: return NV_OP_CEIL; ++ case TGSI_OPCODE_FLR: return NV_OP_FLOOR; ++ case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC; ++ case TGSI_OPCODE_COS: return NV_OP_COS; ++ case TGSI_OPCODE_SIN: return NV_OP_SIN; ++ case TGSI_OPCODE_DDX: return NV_OP_DFDX; ++ case TGSI_OPCODE_DDY: return NV_OP_DFDY; ++ case TGSI_OPCODE_F2I: ++ case TGSI_OPCODE_F2U: ++ case TGSI_OPCODE_I2F: ++ case TGSI_OPCODE_U2F: return NV_OP_CVT; ++ case TGSI_OPCODE_INEG: return NV_OP_NEG_S32; ++ case TGSI_OPCODE_LG2: return NV_OP_LG2; ++ case TGSI_OPCODE_ISHR: return NV_OP_SAR; ++ case TGSI_OPCODE_USHR: return NV_OP_SHR; ++ case TGSI_OPCODE_MAD: return NV_OP_MAD_F32; ++ case TGSI_OPCODE_MAX: return NV_OP_MAX_F32; ++ case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32; ++ case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32; ++ case TGSI_OPCODE_MIN: return NV_OP_MIN_F32; ++ case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32; ++ case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32; ++ case TGSI_OPCODE_MUL: return NV_OP_MUL_F32; ++ case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32; ++ case TGSI_OPCODE_OR: return NV_OP_OR; ++ case TGSI_OPCODE_RCP: return NV_OP_RCP; ++ case TGSI_OPCODE_RSQ: return NV_OP_RSQ; ++ case TGSI_OPCODE_SAD: return NV_OP_SAD; ++ case TGSI_OPCODE_SHL: return NV_OP_SHL; ++ case TGSI_OPCODE_SLT: ++ case TGSI_OPCODE_SGE: ++ case TGSI_OPCODE_SEQ: ++ case TGSI_OPCODE_SGT: ++ case TGSI_OPCODE_SLE: ++ case TGSI_OPCODE_SNE: return NV_OP_FSET_F32; ++ case TGSI_OPCODE_ISLT: ++ case TGSI_OPCODE_ISGE: return NV_OP_SET_S32; ++ case TGSI_OPCODE_USEQ: ++ case TGSI_OPCODE_USGE: ++ case TGSI_OPCODE_USLT: ++ case TGSI_OPCODE_USNE: return NV_OP_SET_U32; ++ case TGSI_OPCODE_TEX: return NV_OP_TEX; ++ case TGSI_OPCODE_TXP: return NV_OP_TEX; ++ case TGSI_OPCODE_TXB: return NV_OP_TXB; ++ case TGSI_OPCODE_TXL: return NV_OP_TXL; ++ case TGSI_OPCODE_XOR: return NV_OP_XOR; ++ default: ++ return NV_OP_NOP; ++ } ++} ++ ++#if 0 ++static ubyte ++infer_src_type(unsigned opcode) ++{ ++ switch (opcode) { ++ case TGSI_OPCODE_MOV: ++ case TGSI_OPCODE_AND: ++ case TGSI_OPCODE_OR: ++ case TGSI_OPCODE_XOR: ++ case TGSI_OPCODE_SAD: ++ case TGSI_OPCODE_U2F: ++ case TGSI_OPCODE_UADD: ++ case TGSI_OPCODE_UDIV: ++ case TGSI_OPCODE_UMOD: ++ case TGSI_OPCODE_UMAD: ++ case TGSI_OPCODE_UMUL: ++ case TGSI_OPCODE_UMAX: ++ case TGSI_OPCODE_UMIN: ++ case TGSI_OPCODE_USEQ: ++ case TGSI_OPCODE_USGE: ++ case TGSI_OPCODE_USLT: ++ case TGSI_OPCODE_USNE: ++ case TGSI_OPCODE_USHR: ++ return NV_TYPE_U32; ++ case TGSI_OPCODE_I2F: ++ case TGSI_OPCODE_IDIV: ++ case TGSI_OPCODE_IMAX: ++ case TGSI_OPCODE_IMIN: ++ case TGSI_OPCODE_INEG: ++ case TGSI_OPCODE_ISGE: ++ case TGSI_OPCODE_ISHR: ++ case TGSI_OPCODE_ISLT: ++ return NV_TYPE_S32; ++ default: ++ return NV_TYPE_F32; ++ } ++} ++ ++static ubyte ++infer_dst_type(unsigned opcode) ++{ ++ switch (opcode) { ++ case TGSI_OPCODE_MOV: ++ case TGSI_OPCODE_F2U: ++ case TGSI_OPCODE_AND: ++ case TGSI_OPCODE_OR: ++ case TGSI_OPCODE_XOR: ++ case TGSI_OPCODE_SAD: ++ case TGSI_OPCODE_UADD: ++ case TGSI_OPCODE_UDIV: ++ case TGSI_OPCODE_UMOD: ++ case TGSI_OPCODE_UMAD: ++ case TGSI_OPCODE_UMUL: ++ case TGSI_OPCODE_UMAX: ++ case TGSI_OPCODE_UMIN: ++ case TGSI_OPCODE_USEQ: ++ case TGSI_OPCODE_USGE: ++ case TGSI_OPCODE_USLT: ++ case TGSI_OPCODE_USNE: ++ case TGSI_OPCODE_USHR: ++ return NV_TYPE_U32; ++ case TGSI_OPCODE_F2I: ++ case TGSI_OPCODE_IDIV: ++ case TGSI_OPCODE_IMAX: ++ case TGSI_OPCODE_IMIN: ++ case TGSI_OPCODE_INEG: ++ case TGSI_OPCODE_ISGE: ++ case TGSI_OPCODE_ISHR: ++ case TGSI_OPCODE_ISLT: ++ return NV_TYPE_S32; ++ default: ++ return NV_TYPE_F32; ++ } ++} ++#endif ++ ++static void ++emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, ++ unsigned chan, struct nv_value *res) ++{ ++ const struct tgsi_full_dst_register *reg = &inst->Dst[0]; ++ struct nv_instruction *nvi; ++ struct nv_value *mem; ++ struct nv_value *ptr = NULL; ++ int idx; ++ ++ idx = reg->Register.Index; ++ assert(chan < 4); ++ ++ if (reg->Register.Indirect) ++ ptr = FETCH_ADDR(reg->Indirect.Index, ++ tgsi_util_get_src_register_swizzle(®->Indirect, 0)); ++ ++ switch (inst->Instruction.Saturate) { ++ case TGSI_SAT_NONE: ++ break; ++ case TGSI_SAT_ZERO_ONE: ++ res = bld_insn_1(bld, NV_OP_SAT, res); ++ break; ++ case TGSI_SAT_MINUS_PLUS_ONE: ++ res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f)); ++ res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f)); ++ break; ++ } ++ ++ switch (reg->Register.File) { ++ case TGSI_FILE_OUTPUT: ++ if (!res->insn) ++ res = bld_insn_1(bld, NV_OP_MOV, res); ++ ++ if (bld->pc->is_fragprog) { ++ assert(!ptr); ++ STORE_OUTP(idx, chan, res); ++ } else { ++ nvi = new_instruction(bld->pc, NV_OP_EXPORT); ++ mem = new_value(bld->pc, bld->ti->output_file, res->reg.size); ++ nv_reference(bld->pc, nvi, 0, mem); ++ nv_reference(bld->pc, nvi, 1, res); ++ if (!ptr) ++ mem->reg.address = bld->ti->output_loc[idx][chan]; ++ else ++ mem->reg.address = 0x80 + idx * 16 + chan * 4; ++ nvi->fixed = 1; ++ } ++ break; ++ case TGSI_FILE_TEMPORARY: ++ assert(idx < BLD_MAX_TEMPS); ++ if (!res->insn) ++ res = bld_insn_1(bld, NV_OP_MOV, res); ++ ++ assert(res->reg.file == NV_FILE_GPR); ++ assert(res->insn->bb = bld->pc->current_block); ++ ++ if (bld->ti->require_stores) ++ bld_lmem_store(bld, ptr, idx * 4 + chan, res); ++ else ++ STORE_TEMP(idx, chan, res); ++ break; ++ case TGSI_FILE_ADDRESS: ++ assert(idx < BLD_MAX_ADDRS); ++ STORE_ADDR(idx, chan, res); ++ break; ++ } ++} ++ ++static INLINE uint32_t ++bld_is_output_written(struct bld_context *bld, int i, int c) ++{ ++ if (c < 0) ++ return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32)); ++ return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32)); ++} ++ ++static void ++bld_append_vp_ucp(struct bld_context *bld) ++{ ++ struct nv_value *res[6]; ++ struct nv_value *ucp, *vtx, *out; ++ struct nv_instruction *insn; ++ int i, c; ++ ++ assert(bld->ti->prog->vp.num_ucps <= 6); ++ ++ for (c = 0; c < 4; ++c) { ++ vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]); ++ ++ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { ++ ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4); ++ ucp->reg.address = i * 16 + c * 4; ++ ++ if (c == 0) ++ res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp); ++ else ++ res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]); ++ } ++ } ++ ++ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { ++ (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4; ++ (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; ++ nv_reference(bld->pc, insn, 0, out); ++ nv_reference(bld->pc, insn, 1, res[i]); ++ } ++} ++ ++static void ++bld_export_fp_outputs(struct bld_context *bld) ++{ ++ struct nv_value *vals[4]; ++ struct nv_instruction *nvi; ++ int i, c, n; ++ ++ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { ++ if (!bld_is_output_written(bld, i, -1)) ++ continue; ++ for (n = 0, c = 0; c < 4; ++c) { ++ if (!bld_is_output_written(bld, i, c)) ++ continue; ++ vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]); ++ assert(vals[n]); ++ vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]); ++ vals[n++]->reg.id = bld->ti->output_loc[i][c]; ++ } ++ assert(n); ++ ++ (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; ++ for (c = 0; c < n; ++c) ++ nv_reference(bld->pc, nvi, c, vals[c]); ++ } ++} ++ ++static void ++bld_new_block(struct bld_context *bld, struct nv_basic_block *b) ++{ ++ int i, c; ++ ++ bld->pc->current_block = b; ++ ++ for (i = 0; i < 4; ++i) ++ bld->saved_addr[i][0] = NULL; ++ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) ++ for (c = 0; c < 4; ++c) ++ bld->saved_inputs[i][c] = NULL; ++ ++ bld->out_kind = CFG_EDGE_FORWARD; ++} ++ ++static struct nv_value * ++bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c) ++{ ++ if (bld->saved_inputs[i][c]) ++ return bld->saved_inputs[i][c]; ++ return NULL; ++} ++ ++static struct nv_value * ++bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) ++{ ++ unsigned cent = mode & NVC0_INTERP_CENTROID; ++ ++ mode &= ~NVC0_INTERP_CENTROID; ++ ++ if (val->reg.address == 0x3fc) { ++ /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */ ++ val = bld_insn_1(bld, NV_OP_LINTERP, val); ++ val->insn->flat = 1; ++ val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31)); ++ val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f)); ++ return val; ++ } else ++ if (mode == NVC0_INTERP_PERSPECTIVE) { ++ val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]); ++ } else { ++ val = bld_insn_1(bld, NV_OP_LINTERP, val); ++ } ++ ++ val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0; ++ val->insn->centroid = cent ? 1 : 0; ++ return val; ++} ++ ++static struct nv_value * ++emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, ++ const unsigned s, const unsigned chan) ++{ ++ const struct tgsi_full_src_register *src = &insn->Src[s]; ++ struct nv_value *res = NULL; ++ struct nv_value *ptr = NULL; ++ int idx, ind_idx, dim_idx; ++ unsigned swz, ind_swz, sgn; ++ ++ idx = src->Register.Index; ++ swz = tgsi_util_get_full_src_register_swizzle(src, chan); ++ ++ if (src->Register.Indirect) { ++ ind_idx = src->Indirect.Index; ++ ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0); ++ ++ ptr = FETCH_ADDR(ind_idx, ind_swz); ++ } ++ ++ if (src->Register.Dimension) ++ dim_idx = src->Dimension.Index; ++ else ++ dim_idx = 0; ++ ++ switch (src->Register.File) { ++ case TGSI_FILE_CONSTANT: ++ assert(dim_idx < 14); ++ res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4); ++ res->reg.address = idx * 16 + swz * 4; ++ res = bld_insn_1(bld, NV_OP_LD, res); ++ if (ptr) ++ bld_src_pointer(bld, res->insn, 1, ptr); ++ break; ++ case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */ ++ assert(idx < bld->ti->immd32_nr); ++ res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]); ++ break; ++ case TGSI_FILE_INPUT: ++ assert(!src->Register.Dimension); ++ if (!ptr) { ++ res = bld_get_saved_input(bld, idx, swz); ++ if (res) ++ return res; ++ } ++ res = new_value(bld->pc, bld->ti->input_file, 4); ++ if (ptr) ++ res->reg.address = 0x80 + idx * 16 + swz * 4; ++ else ++ res->reg.address = bld->ti->input_loc[idx][swz]; ++ ++ if (bld->pc->is_fragprog) ++ res = bld_interp(bld, bld->ti->interp_mode[idx], res); ++ else ++ res = bld_insn_1(bld, NV_OP_VFETCH, res); ++ ++ if (ptr) ++ bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr); ++ else ++ bld->saved_inputs[idx][swz] = res; ++ break; ++ case TGSI_FILE_TEMPORARY: ++ if (bld->ti->require_stores) ++ res = bld_lmem_load(bld, ptr, idx * 4 + swz); ++ else ++ res = bld_fetch_global(bld, &bld->tvs[idx][swz]); ++ break; ++ case TGSI_FILE_ADDRESS: ++ res = bld_fetch_global(bld, &bld->avs[idx][swz]); ++ break; ++ case TGSI_FILE_PREDICATE: ++ res = bld_fetch_global(bld, &bld->pvs[idx][swz]); ++ break; ++ case TGSI_FILE_SYSTEM_VALUE: ++ assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */ ++ res = new_value(bld->pc, ++ bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4); ++ res->reg.address = bld->ti->sysval_loc[idx]; ++ ++ if (res->reg.file == NV_FILE_MEM_A) ++ res = bld_insn_1(bld, NV_OP_VFETCH, res); ++ else ++ res = bld_interp(bld, NVC0_INTERP_FLAT, res); ++ ++ /* mesa doesn't do real integers yet :-(and in GL this should be S32) */ ++ res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res); ++ break; ++ default: ++ NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); ++ abort(); ++ break; ++ } ++ if (!res) ++ return bld_undef(bld, NV_FILE_GPR); ++ ++ sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); ++ ++ switch (sgn) { ++ case TGSI_UTIL_SIGN_KEEP: ++ break; ++ case TGSI_UTIL_SIGN_CLEAR: ++ res = bld_insn_1(bld, NV_OP_ABS_F32, res); ++ break; ++ case TGSI_UTIL_SIGN_TOGGLE: ++ res = bld_insn_1(bld, NV_OP_NEG_F32, res); ++ break; ++ case TGSI_UTIL_SIGN_SET: ++ res = bld_insn_1(bld, NV_OP_ABS_F32, res); ++ res = bld_insn_1(bld, NV_OP_NEG_F32, res); ++ break; ++ default: ++ NOUVEAU_ERR("illegal/unhandled src reg sign mode\n"); ++ abort(); ++ break; ++ } ++ ++ return res; ++} ++ ++static void ++bld_lit(struct bld_context *bld, struct nv_value *dst0[4], ++ const struct tgsi_full_instruction *insn) ++{ ++ struct nv_value *val0 = NULL; ++ unsigned mask = insn->Dst[0].Register.WriteMask; ++ ++ if (mask & ((1 << 0) | (1 << 3))) ++ dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f); ++ ++ if (mask & (3 << 1)) { ++ val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero); ++ if (mask & (1 << 1)) ++ dst0[1] = val0; ++ } ++ ++ if (mask & (1 << 2)) { ++ struct nv_value *val1, *val3, *src1, *src3, *pred; ++ struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f); ++ struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f); ++ ++ src1 = emit_fetch(bld, insn, 0, 1); ++ src3 = emit_fetch(bld, insn, 0, 3); ++ ++ pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero); ++ ++ val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero); ++ val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128); ++ val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128); ++ val3 = bld_pow(bld, val1, val3); ++ ++ dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero); ++ bld_src_predicate(bld, dst0[2]->insn, 1, pred); ++ ++ dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]); ++ } ++} ++ ++static INLINE void ++describe_texture_target(unsigned target, int *dim, ++ int *array, int *cube, int *shadow) ++{ ++ *array = *cube = *shadow = 0; ++ ++ switch (target) { ++ case TGSI_TEXTURE_1D: ++ *dim = 1; ++ break; ++ case TGSI_TEXTURE_SHADOW1D: ++ *dim = *shadow = 1; ++ break; ++ case TGSI_TEXTURE_UNKNOWN: ++ case TGSI_TEXTURE_2D: ++ case TGSI_TEXTURE_RECT: ++ *dim = 2; ++ break; ++ case TGSI_TEXTURE_SHADOW2D: ++ case TGSI_TEXTURE_SHADOWRECT: ++ *dim = 2; ++ *shadow = 1; ++ break; ++ case TGSI_TEXTURE_3D: ++ *dim = 3; ++ break; ++ case TGSI_TEXTURE_CUBE: ++ *dim = 2; ++ *cube = 1; ++ break; ++ /* ++ case TGSI_TEXTURE_CUBE_ARRAY: ++ *dim = 2; ++ *cube = *array = 1; ++ break; ++ case TGSI_TEXTURE_1D_ARRAY: ++ *dim = *array = 1; ++ break; ++ case TGSI_TEXTURE_2D_ARRAY: ++ *dim = 2; ++ *array = 1; ++ break; ++ case TGSI_TEXTURE_SHADOW1D_ARRAY: ++ *dim = *array = *shadow = 1; ++ break; ++ case TGSI_TEXTURE_SHADOW2D_ARRAY: ++ *dim = 2; ++ *array = *shadow = 1; ++ break; ++ case TGSI_TEXTURE_CUBE_ARRAY: ++ *dim = 2; ++ *array = *cube = 1; ++ break; ++ */ ++ default: ++ assert(0); ++ break; ++ } ++} ++ ++static struct nv_value * ++bld_clone(struct bld_context *bld, struct nv_instruction *nvi) ++{ ++ struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode); ++ struct nv_instruction *next, *prev; ++ int c; ++ ++ next = dupi->next; ++ prev = dupi->prev; ++ ++ *dupi = *nvi; ++ ++ dupi->next = next; ++ dupi->prev = prev; ++ ++ for (c = 0; c < 5 && nvi->def[c]; ++c) ++ bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c])); ++ ++ for (c = 0; c < 6 && nvi->src[c]; ++c) { ++ dupi->src[c] = NULL; ++ nv_reference(bld->pc, dupi, c, nvi->src[c]->value); ++ } ++ ++ return dupi->def[0]; ++} ++ ++/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ ++static void ++load_proj_tex_coords(struct bld_context *bld, ++ struct nv_value *t[4], int dim, int shadow, ++ const struct tgsi_full_instruction *insn) ++{ ++ int c; ++ unsigned mask = (1 << dim) - 1; ++ ++ if (shadow) ++ mask |= 4; /* depth comparison value */ ++ ++ t[3] = emit_fetch(bld, insn, 0, 3); ++ if (t[3]->insn->opcode == NV_OP_PINTERP) { ++ t[3] = bld_clone(bld, t[3]->insn); ++ t[3]->insn->opcode = NV_OP_LINTERP; ++ nv_reference(bld->pc, t[3]->insn, 1, NULL); ++ } ++ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); ++ ++ for (c = 0; c < 4; ++c) { ++ if (!(mask & (1 << c))) ++ continue; ++ t[c] = emit_fetch(bld, insn, 0, c); ++ ++ if (t[c]->insn->opcode != NV_OP_PINTERP) ++ continue; ++ mask &= ~(1 << c); ++ ++ t[c] = bld_clone(bld, t[c]->insn); ++ nv_reference(bld->pc, t[c]->insn, 1, t[3]); ++ } ++ if (mask == 0) ++ return; ++ ++ t[3] = emit_fetch(bld, insn, 0, 3); ++ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); ++ ++ for (c = 0; c < 4; ++c) ++ if (mask & (1 << c)) ++ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]); ++} ++ ++/* For a quad of threads / top left, top right, bottom left, bottom right ++ * pixels, do a different operation, and take src0 from a specific thread. ++ */ ++#define QOP_ADD 0 ++#define QOP_SUBR 1 ++#define QOP_SUB 2 ++#define QOP_MOV1 3 ++ ++#define QOP(a, b, c, d) \ ++ ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6)) ++ ++static INLINE struct nv_value * ++bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, ++ struct nv_value *src1, boolean wp) ++{ ++ struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1); ++ val->insn->lanes = lane; ++ val->insn->quadop = qop; ++ if (wp) { ++ assert(!"quadop predicate write"); ++ } ++ return val; ++} ++ ++/* order of TGSI operands: x y z layer shadow lod/bias */ ++/* order of native operands: layer x y z | lod/bias shadow */ ++static struct nv_instruction * ++emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, ++ struct nv_value *dst[4], struct nv_value *arg[4], ++ int dim, int array, int cube, int shadow) ++{ ++ struct nv_value *src[4]; ++ struct nv_instruction *nvi, *bnd; ++ int c; ++ int s = 0; ++ boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; ++ ++ if (array) ++ arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); ++ ++ /* ensure that all inputs reside in a GPR */ ++ for (c = 0; c < dim + array + cube + shadow; ++c) ++ (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; ++ ++ /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ ++ ++ bnd = new_instruction(bld->pc, NV_OP_BIND); ++ if (array) { ++ src[s] = new_value(bld->pc, NV_FILE_GPR, 4); ++ bld_def(bnd, s, src[s]); ++ nv_reference(bld->pc, bnd, s++, arg[dim + cube]); ++ } ++ for (c = 0; c < dim + cube; ++c, ++s) { ++ src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4)); ++ nv_reference(bld->pc, bnd, s, arg[c]); ++ } ++ ++ if (shadow || lodbias) { ++ bnd = new_instruction(bld->pc, NV_OP_BIND); ++ ++ if (lodbias) { ++ src[s] = new_value(bld->pc, NV_FILE_GPR, 4); ++ bld_def(bnd, 0, src[s++]); ++ nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]); ++ } ++ if (shadow) { ++ src[s] = new_value(bld->pc, NV_FILE_GPR, 4); ++ bld_def(bnd, lodbias, src[s++]); ++ nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]); ++ } ++ } ++ ++ nvi = new_instruction(bld->pc, opcode); ++ for (c = 0; c < 4; ++c) ++ dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); ++ for (c = 0; c < s; ++c) ++ nv_reference(bld->pc, nvi, c, src[c]); ++ ++ nvi->ext.tex.t = tic; ++ nvi->ext.tex.s = tsc; ++ nvi->tex_mask = 0xf; ++ nvi->tex_cube = cube; ++ nvi->tex_dim = dim; ++ nvi->tex_cube = cube; ++ nvi->tex_shadow = shadow; ++ nvi->tex_live = 0; ++ ++ return nvi; ++} ++ ++/* ++static boolean ++bld_is_constant(struct nv_value *val) ++{ ++ if (val->reg.file == NV_FILE_IMM) ++ return TRUE; ++ return val->insn && nvCG_find_constant(val->insn->src[0]); ++} ++*/ ++ ++static void ++bld_tex(struct bld_context *bld, struct nv_value *dst0[4], ++ const struct tgsi_full_instruction *insn) ++{ ++ struct nv_value *t[4], *s[3]; ++ uint opcode = translate_opcode(insn->Instruction.Opcode); ++ int c, dim, array, cube, shadow; ++ const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; ++ const int tic = insn->Src[1].Register.Index; ++ const int tsc = tic; ++ ++ describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow); ++ ++ assert(dim + array + shadow + lodbias <= 5); ++ ++ if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) ++ load_proj_tex_coords(bld, t, dim, shadow, insn); ++ else { ++ for (c = 0; c < dim + cube + array; ++c) ++ t[c] = emit_fetch(bld, insn, 0, c); ++ if (shadow) ++ t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2)); ++ } ++ ++ if (cube) { ++ for (c = 0; c < 3; ++c) ++ s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); ++ ++ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]); ++ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]); ++ s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]); ++ ++ for (c = 0; c < 3; ++c) ++ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); ++ } ++ ++ if (lodbias) ++ t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3); ++ ++ emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow); ++} ++ ++static INLINE struct nv_value * ++bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn, ++ int n) ++{ ++ struct nv_value *dotp, *src0, *src1; ++ int c; ++ ++ src0 = emit_fetch(bld, insn, 0, 0); ++ src1 = emit_fetch(bld, insn, 1, 0); ++ dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); ++ ++ for (c = 1; c < n; ++c) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src1 = emit_fetch(bld, insn, 1, c); ++ dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp); ++ } ++ return dotp; ++} ++ ++#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \ ++ for (chan = 0; chan < 4; ++chan) \ ++ if ((inst)->Dst[0].Register.WriteMask & (1 << chan)) ++ ++static void ++bld_instruction(struct bld_context *bld, ++ const struct tgsi_full_instruction *insn) ++{ ++ struct nv_value *src0; ++ struct nv_value *src1; ++ struct nv_value *src2; ++ struct nv_value *dst0[4] = { NULL }; ++ struct nv_value *temp; ++ int c; ++ uint opcode = translate_opcode(insn->Instruction.Opcode); ++ uint8_t mask = insn->Dst[0].Register.WriteMask; ++ ++#ifdef NOUVEAU_DEBUG ++ debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); ++#endif ++ ++ switch (insn->Instruction.Opcode) { ++ case TGSI_OPCODE_ADD: ++ case TGSI_OPCODE_MAX: ++ case TGSI_OPCODE_MIN: ++ case TGSI_OPCODE_MUL: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src1 = emit_fetch(bld, insn, 1, c); ++ dst0[c] = bld_insn_2(bld, opcode, src0, src1); ++ } ++ break; ++ case TGSI_OPCODE_ARL: ++ src1 = bld_imm_u32(bld, 4); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src0 = bld_insn_1(bld, NV_OP_FLOOR, src0); ++ src0->insn->ext.cvt.d = NV_TYPE_S32; ++ src0->insn->ext.cvt.s = NV_TYPE_F32; ++ dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1); ++ } ++ break; ++ case TGSI_OPCODE_CMP: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero); ++ src1 = emit_fetch(bld, insn, 1, c); ++ src2 = emit_fetch(bld, insn, 2, c); ++ dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0); ++ } ++ break; ++ case TGSI_OPCODE_COS: ++ case TGSI_OPCODE_SIN: ++ src0 = emit_fetch(bld, insn, 0, 0); ++ temp = bld_insn_1(bld, NV_OP_PRESIN, src0); ++ if (insn->Dst[0].Register.WriteMask & 7) ++ temp = bld_insn_1(bld, opcode, temp); ++ for (c = 0; c < 3; ++c) ++ if (insn->Dst[0].Register.WriteMask & (1 << c)) ++ dst0[c] = temp; ++ if (!(insn->Dst[0].Register.WriteMask & (1 << 3))) ++ break; ++ src0 = emit_fetch(bld, insn, 0, 3); ++ temp = bld_insn_1(bld, NV_OP_PRESIN, src0); ++ dst0[3] = bld_insn_1(bld, opcode, temp); ++ break; ++ case TGSI_OPCODE_DP2: ++ temp = bld_dot(bld, insn, 2); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_DP3: ++ temp = bld_dot(bld, insn, 3); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_DP4: ++ temp = bld_dot(bld, insn, 4); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_DPH: ++ src0 = bld_dot(bld, insn, 3); ++ src1 = emit_fetch(bld, insn, 1, 3); ++ temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_DST: ++ if (insn->Dst[0].Register.WriteMask & 1) ++ dst0[0] = bld_imm_f32(bld, 1.0f); ++ if (insn->Dst[0].Register.WriteMask & 2) { ++ src0 = emit_fetch(bld, insn, 0, 1); ++ src1 = emit_fetch(bld, insn, 1, 1); ++ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); ++ } ++ if (insn->Dst[0].Register.WriteMask & 4) ++ dst0[2] = emit_fetch(bld, insn, 0, 2); ++ if (insn->Dst[0].Register.WriteMask & 8) ++ dst0[3] = emit_fetch(bld, insn, 1, 3); ++ break; ++ case TGSI_OPCODE_EXP: ++ src0 = emit_fetch(bld, insn, 0, 0); ++ temp = bld_insn_1(bld, NV_OP_FLOOR, src0); ++ ++ if (insn->Dst[0].Register.WriteMask & 2) ++ dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp); ++ if (insn->Dst[0].Register.WriteMask & 1) { ++ temp = bld_insn_1(bld, NV_OP_PREEX2, temp); ++ dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp); ++ } ++ if (insn->Dst[0].Register.WriteMask & 4) { ++ temp = bld_insn_1(bld, NV_OP_PREEX2, src0); ++ dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp); ++ } ++ if (insn->Dst[0].Register.WriteMask & 8) ++ dst0[3] = bld_imm_f32(bld, 1.0f); ++ break; ++ case TGSI_OPCODE_EX2: ++ src0 = emit_fetch(bld, insn, 0, 0); ++ temp = bld_insn_1(bld, NV_OP_PREEX2, src0); ++ temp = bld_insn_1(bld, NV_OP_EX2, temp); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_FRC: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0); ++ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]); ++ } ++ break; ++ case TGSI_OPCODE_KIL: ++ for (c = 0; c < 4; ++c) ++ bld_kil(bld, emit_fetch(bld, insn, 0, c)); ++ break; ++ case TGSI_OPCODE_KILP: ++ (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1; ++ break; ++ case TGSI_OPCODE_IF: ++ { ++ struct nv_basic_block *b = new_basic_block(bld->pc); ++ ++ assert(bld->cond_lvl < BLD_MAX_COND_NESTING); ++ ++ nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD); ++ ++ bld->join_bb[bld->cond_lvl] = bld->pc->current_block; ++ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; ++ ++ src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, ++ emit_fetch(bld, insn, 0, 0), bld->zero); ++ ++ bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); ++ ++ ++bld->cond_lvl; ++ bld_new_block(bld, b); ++ } ++ break; ++ case TGSI_OPCODE_ELSE: ++ { ++ struct nv_basic_block *b = new_basic_block(bld->pc); ++ ++ --bld->cond_lvl; ++ nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); ++ ++ bld->cond_bb[bld->cond_lvl]->exit->target = b; ++ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; ++ ++ new_instruction(bld->pc, NV_OP_BRA)->terminator = 1; ++ ++ ++bld->cond_lvl; ++ bld_new_block(bld, b); ++ } ++ break; ++ case TGSI_OPCODE_ENDIF: ++ { ++ struct nv_basic_block *b = new_basic_block(bld->pc); ++ ++ --bld->cond_lvl; ++ nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); ++ nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); ++ ++ bld->cond_bb[bld->cond_lvl]->exit->target = b; ++ ++ bld_new_block(bld, b); ++ ++ if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) { ++ bld->join_bb[bld->cond_lvl]->exit->prev->target = b; ++ new_instruction(bld->pc, NV_OP_JOIN)->join = 1; ++ } ++ } ++ break; ++ case TGSI_OPCODE_BGNLOOP: ++ { ++ struct nv_basic_block *bl = new_basic_block(bld->pc); ++ struct nv_basic_block *bb = new_basic_block(bld->pc); ++ ++ assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING); ++ ++ bld->loop_bb[bld->loop_lvl] = bl; ++ bld->brkt_bb[bld->loop_lvl] = bb; ++ ++ nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER); ++ ++ bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]); ++ ++ if (bld->loop_lvl == bld->pc->loop_nesting_bound) ++ bld->pc->loop_nesting_bound++; ++ ++ bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl); ++ bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl); ++ bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl); ++ } ++ break; ++ case TGSI_OPCODE_BRK: ++ { ++ struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; ++ ++ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); ++ ++ if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ ++ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); ++ ++ bld->out_kind = CFG_EDGE_FAKE; ++ } ++ break; ++ case TGSI_OPCODE_CONT: ++ { ++ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; ++ ++ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); ++ ++ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); ++ ++ if ((bb = bld->join_bb[bld->cond_lvl - 1])) { ++ bld->join_bb[bld->cond_lvl - 1] = NULL; ++ nvc0_insn_delete(bb->exit->prev); ++ } ++ bld->out_kind = CFG_EDGE_FAKE; ++ } ++ break; ++ case TGSI_OPCODE_ENDLOOP: ++ { ++ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; ++ ++ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); ++ ++ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); ++ ++ bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ ++ ++ bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]); ++ } ++ break; ++ case TGSI_OPCODE_ABS: ++ case TGSI_OPCODE_CEIL: ++ case TGSI_OPCODE_FLR: ++ case TGSI_OPCODE_TRUNC: ++ case TGSI_OPCODE_DDX: ++ case TGSI_OPCODE_DDY: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ dst0[c] = bld_insn_1(bld, opcode, src0); ++ } ++ break; ++ case TGSI_OPCODE_LIT: ++ bld_lit(bld, dst0, insn); ++ break; ++ case TGSI_OPCODE_LRP: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src1 = emit_fetch(bld, insn, 1, c); ++ src2 = emit_fetch(bld, insn, 2, c); ++ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); ++ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2); ++ } ++ break; ++ case TGSI_OPCODE_MOV: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = emit_fetch(bld, insn, 0, c); ++ break; ++ case TGSI_OPCODE_MAD: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src1 = emit_fetch(bld, insn, 1, c); ++ src2 = emit_fetch(bld, insn, 2, c); ++ dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2); ++ } ++ break; ++ case TGSI_OPCODE_POW: ++ src0 = emit_fetch(bld, insn, 0, 0); ++ src1 = emit_fetch(bld, insn, 1, 0); ++ temp = bld_pow(bld, src0, src1); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_LOG: ++ src0 = emit_fetch(bld, insn, 0, 0); ++ src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0); ++ temp = bld_insn_1(bld, NV_OP_LG2, src0); ++ dst0[2] = temp; ++ if (insn->Dst[0].Register.WriteMask & 3) { ++ temp = bld_insn_1(bld, NV_OP_FLOOR, temp); ++ dst0[0] = temp; ++ } ++ if (insn->Dst[0].Register.WriteMask & 2) { ++ temp = bld_insn_1(bld, NV_OP_PREEX2, temp); ++ temp = bld_insn_1(bld, NV_OP_EX2, temp); ++ temp = bld_insn_1(bld, NV_OP_RCP, temp); ++ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp); ++ } ++ if (insn->Dst[0].Register.WriteMask & 8) ++ dst0[3] = bld_imm_f32(bld, 1.0f); ++ break; ++ case TGSI_OPCODE_RCP: ++ case TGSI_OPCODE_LG2: ++ src0 = emit_fetch(bld, insn, 0, 0); ++ temp = bld_insn_1(bld, opcode, src0); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_RSQ: ++ src0 = emit_fetch(bld, insn, 0, 0); ++ temp = bld_insn_1(bld, NV_OP_ABS_F32, src0); ++ temp = bld_insn_1(bld, NV_OP_RSQ, temp); ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) ++ dst0[c] = temp; ++ break; ++ case TGSI_OPCODE_SLT: ++ case TGSI_OPCODE_SGE: ++ case TGSI_OPCODE_SEQ: ++ case TGSI_OPCODE_SGT: ++ case TGSI_OPCODE_SLE: ++ case TGSI_OPCODE_SNE: ++ case TGSI_OPCODE_ISLT: ++ case TGSI_OPCODE_ISGE: ++ case TGSI_OPCODE_USEQ: ++ case TGSI_OPCODE_USGE: ++ case TGSI_OPCODE_USLT: ++ case TGSI_OPCODE_USNE: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src1 = emit_fetch(bld, insn, 1, c); ++ dst0[c] = bld_insn_2(bld, opcode, src0, src1); ++ dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode); ++ } ++ break; ++ case TGSI_OPCODE_SCS: ++ if (insn->Dst[0].Register.WriteMask & 0x3) { ++ src0 = emit_fetch(bld, insn, 0, 0); ++ temp = bld_insn_1(bld, NV_OP_PRESIN, src0); ++ if (insn->Dst[0].Register.WriteMask & 0x1) ++ dst0[0] = bld_insn_1(bld, NV_OP_COS, temp); ++ if (insn->Dst[0].Register.WriteMask & 0x2) ++ dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp); ++ } ++ if (insn->Dst[0].Register.WriteMask & 0x4) ++ dst0[2] = bld_imm_f32(bld, 0.0f); ++ if (insn->Dst[0].Register.WriteMask & 0x8) ++ dst0[3] = bld_imm_f32(bld, 1.0f); ++ break; ++ case TGSI_OPCODE_SSG: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ ++ src0 = emit_fetch(bld, insn, 0, c); ++ src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero); ++ temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); ++ temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f)); ++ dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp); ++ bld_src_predicate(bld, dst0[c]->insn, 1, src1); ++ } ++ break; ++ case TGSI_OPCODE_SUB: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ src0 = emit_fetch(bld, insn, 0, c); ++ src1 = emit_fetch(bld, insn, 1, c); ++ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1); ++ } ++ break; ++ case TGSI_OPCODE_TEX: ++ case TGSI_OPCODE_TXB: ++ case TGSI_OPCODE_TXL: ++ case TGSI_OPCODE_TXP: ++ bld_tex(bld, dst0, insn); ++ break; ++ case TGSI_OPCODE_XPD: ++ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { ++ if (c == 3) { ++ dst0[3] = bld_imm_f32(bld, 1.0f); ++ break; ++ } ++ src0 = emit_fetch(bld, insn, 1, (c + 1) % 3); ++ src1 = emit_fetch(bld, insn, 0, (c + 2) % 3); ++ dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); ++ ++ src0 = emit_fetch(bld, insn, 0, (c + 1) % 3); ++ src1 = emit_fetch(bld, insn, 1, (c + 2) % 3); ++ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]); ++ ++ dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG; ++ } ++ break; ++ case TGSI_OPCODE_RET: ++ (new_instruction(bld->pc, NV_OP_RET))->fixed = 1; ++ break; ++ case TGSI_OPCODE_END: ++ /* VP outputs are exported in-place as scalars, optimization later */ ++ if (bld->pc->is_fragprog) ++ bld_export_fp_outputs(bld); ++ if (bld->ti->append_ucp) ++ bld_append_vp_ucp(bld); ++ return; ++ default: ++ NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode); ++ abort(); ++ return; ++ } ++ ++ if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT && ++ !bld->pc->is_fragprog) { ++ struct nv_instruction *mi = NULL; ++ uint size; ++ ++ if (bld->ti->append_ucp) { ++ if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) { ++ bld->hpos_index = insn->Dst[0].Register.Index; ++ for (c = 0; c < 4; ++c) ++ if (mask & (1 << c)) ++ STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]); ++ } ++ } ++ ++ for (c = 0; c < 4; ++c) ++ if ((mask & (1 << c)) && ++ ((dst0[c]->reg.file == NV_FILE_IMM) || ++ (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) ++ dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); ++ ++ c = 0; ++ if ((mask & 0x3) == 0x3) { ++ mask &= ~0x3; ++ size = 8; ++ mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn; ++ } ++ if ((mask & 0xc) == 0xc) { ++ mask &= ~0xc; ++ if (mi) { ++ size = 16; ++ nv_reference(bld->pc, mi, 2, dst0[2]); ++ nv_reference(bld->pc, mi, 3, dst0[3]); ++ } else { ++ c = 2; ++ size = 8; ++ mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn; ++ } ++ } else ++ if (mi && (mask & 0x4)) { ++ size = 12; ++ mask &= ~0x4; ++ nv_reference(bld->pc, mi, 2, dst0[2]); ++ } ++ ++ if (mi) { ++ struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT); ++ int s; ++ ++ nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4)); ++ nv_reference(bld->pc, ex, 1, mi->def[0]); ++ ++ for (s = 1; s < size / 4; ++s) { ++ bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4)); ++ nv_reference(bld->pc, ex, s + 1, mi->def[s]); ++ } ++ ++ ex->fixed = 1; ++ ex->src[0]->value->reg.size = size; ++ ex->src[0]->value->reg.address = ++ bld->ti->output_loc[insn->Dst[0].Register.Index][c]; ++ } ++ } ++ ++ for (c = 0; c < 4; ++c) ++ if (mask & (1 << c)) ++ emit_store(bld, insn, c, dst0[c]); ++} ++ ++static INLINE void ++bld_free_registers(struct bld_register *base, int n) ++{ ++ int i, c; ++ ++ for (i = 0; i < n; ++i) ++ for (c = 0; c < 4; ++c) ++ util_dynarray_fini(&base[i * 4 + c].vals); ++} ++ ++int ++nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti) ++{ ++ struct bld_context *bld = CALLOC_STRUCT(bld_context); ++ unsigned ip; ++ ++ pc->root[0] = pc->current_block = new_basic_block(pc); ++ ++ bld->pc = pc; ++ bld->ti = ti; ++ ++ pc->loop_nesting_bound = 1; ++ ++ bld->zero = new_value(pc, NV_FILE_GPR, 4); ++ bld->zero->reg.id = 63; ++ ++ if (pc->is_fragprog) { ++ struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4); ++ mem->reg.address = 0x7c; ++ ++ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem); ++ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]); ++ } ++ ++ for (ip = 0; ip < ti->num_insns; ++ip) ++ bld_instruction(bld, &ti->insns[ip]); ++ ++ bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS); ++ bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS); ++ bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS); ++ bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); ++ ++ FREE(bld); ++ return 0; ++} ++ ++/* If a variable is assigned in a loop, replace all references to the value ++ * from outside the loop with a phi value. ++ */ ++static void ++bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b, ++ struct nv_value *old_val, ++ struct nv_value *new_val) ++{ ++ struct nv_instruction *nvi; ++ ++ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) { ++ int s; ++ for (s = 0; s < 6 && nvi->src[s]; ++s) ++ if (nvi->src[s]->value == old_val) ++ nv_reference(pc, nvi, s, new_val); ++ } ++ ++ b->pass_seq = pc->pass_seq; ++ ++ if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq) ++ bld_replace_value(pc, b->out[0], old_val, new_val); ++ ++ if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq) ++ bld_replace_value(pc, b->out[1], old_val, new_val); ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c +new file mode 100644 +index 0000000..286b382 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_transfer.c +@@ -0,0 +1,381 @@ ++ ++#include "util/u_format.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_transfer.h" ++ ++#include "nv50_defs.xml.h" ++ ++struct nvc0_transfer { ++ struct pipe_transfer base; ++ struct nvc0_m2mf_rect rect[2]; ++ uint32_t nblocksx; ++ uint32_t nblocksy; ++}; ++ ++static void ++nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen, ++ const struct nvc0_m2mf_rect *dst, ++ const struct nvc0_m2mf_rect *src, ++ uint32_t nblocksx, uint32_t nblocksy) ++{ ++ struct nouveau_channel *chan = nouveau_screen(pscreen)->channel; ++ const int cpp = dst->cpp; ++ uint32_t src_ofst = src->base; ++ uint32_t dst_ofst = dst->base; ++ uint32_t height = nblocksy; ++ uint32_t sy = src->y; ++ uint32_t dy = dst->y; ++ uint32_t exec = (1 << 20); ++ ++ assert(dst->cpp == src->cpp); ++ ++ if (nouveau_bo_tile_layout(src->bo)) { ++ BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5); ++ OUT_RING (chan, src->tile_mode); ++ OUT_RING (chan, src->width * cpp); ++ OUT_RING (chan, src->height); ++ OUT_RING (chan, src->depth); ++ OUT_RING (chan, src->z); ++ } else { ++ src_ofst += src->y * src->pitch + src->x * cpp; ++ ++ BEGIN_RING(chan, RING_MF(PITCH_IN), 1); ++ OUT_RING (chan, src->width * cpp); ++ ++ exec |= NVC0_M2MF_EXEC_LINEAR_IN; ++ } ++ ++ if (nouveau_bo_tile_layout(dst->bo)) { ++ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); ++ OUT_RING (chan, dst->tile_mode); ++ OUT_RING (chan, dst->width * cpp); ++ OUT_RING (chan, dst->height); ++ OUT_RING (chan, dst->depth); ++ OUT_RING (chan, dst->z); ++ } else { ++ dst_ofst += dst->y * dst->pitch + dst->x * cpp; ++ ++ BEGIN_RING(chan, RING_MF(PITCH_OUT), 1); ++ OUT_RING (chan, dst->width * cpp); ++ ++ exec |= NVC0_M2MF_EXEC_LINEAR_OUT; ++ } ++ ++ while (height) { ++ int line_count = height > 2047 ? 2047 : height; ++ ++ MARK_RING (chan, 17, 4); ++ ++ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); ++ OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD); ++ ++ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); ++ OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR); ++ ++ if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) { ++ BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2); ++ OUT_RING (chan, src->x * cpp); ++ OUT_RING (chan, sy); ++ } else { ++ src_ofst += line_count * src->pitch; ++ } ++ if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) { ++ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); ++ OUT_RING (chan, dst->x * cpp); ++ OUT_RING (chan, dy); ++ } else { ++ dst_ofst += line_count * dst->pitch; ++ } ++ ++ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); ++ OUT_RING (chan, nblocksx * cpp); ++ OUT_RING (chan, line_count); ++ BEGIN_RING(chan, RING_MF(EXEC), 1); ++ OUT_RING (chan, exec); ++ ++ height -= line_count; ++ sy += line_count; ++ dy += line_count; ++ } ++} ++ ++void ++nvc0_m2mf_push_linear(struct nvc0_context *nvc0, ++ struct nouveau_bo *dst, unsigned domain, int offset, ++ unsigned size, void *data) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ uint32_t *src = (uint32_t *)data; ++ unsigned count = (size + 3) / 4; ++ ++ MARK_RING (chan, 8, 2); ++ ++ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); ++ OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); ++ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); ++ OUT_RING (chan, size); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_MF(EXEC), 1); ++ OUT_RING (chan, 0x100111); ++ ++ while (count) { ++ unsigned nr = AVAIL_RING(chan); ++ ++ if (nr < 9) { ++ FIRE_RING(chan); ++ nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR); ++ continue; ++ } ++ nr = MIN2(count, nr - 1); ++ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); ++ ++ BEGIN_RING_NI(chan, RING_MF(DATA), nr); ++ OUT_RINGp (chan, src, nr); ++ ++ src += nr; ++ count -= nr; ++ } ++} ++ ++void ++nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, ++ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, ++ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, ++ unsigned size) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ while (size) { ++ unsigned bytes = MIN2(size, 1 << 17); ++ ++ MARK_RING (chan, 11, 4); ++ ++ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); ++ OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); ++ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); ++ OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); ++ OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); ++ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); ++ OUT_RING (chan, bytes); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_MF(EXEC), 1); ++ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | ++ NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); ++ ++ srcoff += bytes; ++ dstoff += bytes; ++ size -= bytes; ++ } ++} ++ ++static void ++nvc0_m2mf_push_rect(struct pipe_screen *pscreen, ++ const struct nvc0_m2mf_rect *dst, ++ const void *data, ++ unsigned nblocksx, unsigned nblocksy) ++{ ++ struct nouveau_channel *chan; ++ const uint8_t *src = (const uint8_t *)data; ++ const int cpp = dst->cpp; ++ const int line_len = nblocksx * cpp; ++ int dy = dst->y; ++ ++ assert(nouveau_bo_tile_layout(dst->bo)); ++ ++ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); ++ OUT_RING (chan, dst->tile_mode); ++ OUT_RING (chan, dst->width * cpp); ++ OUT_RING (chan, dst->height); ++ OUT_RING (chan, dst->depth); ++ OUT_RING (chan, dst->z); ++ ++ while (nblocksy) { ++ int line_count, words; ++ int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN); ++ ++ if (size < (12 + words)) { ++ FIRE_RING(chan); ++ continue; ++ } ++ line_count = (size * 4) / line_len; ++ words = (line_count * line_len + 3) / 4; ++ ++ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); ++ OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); ++ ++ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); ++ OUT_RING (chan, dst->x * cpp); ++ OUT_RING (chan, dy); ++ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); ++ OUT_RING (chan, line_len); ++ OUT_RING (chan, line_count); ++ BEGIN_RING(chan, RING_MF(EXEC), 1); ++ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | ++ NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN); ++ ++ BEGIN_RING_NI(chan, RING_MF(DATA), words); ++ OUT_RINGp (chan, src, words); ++ ++ dy += line_count; ++ src += line_len * line_count; ++ nblocksy -= line_count; ++ } ++} ++ ++struct pipe_transfer * ++nvc0_miptree_transfer_new(struct pipe_context *pctx, ++ struct pipe_resource *res, ++ unsigned level, ++ unsigned usage, ++ const struct pipe_box *box) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pctx); ++ struct pipe_screen *pscreen = pctx->screen; ++ struct nouveau_device *dev = nvc0->screen->base.device; ++ struct nvc0_miptree *mt = nvc0_miptree(res); ++ struct nvc0_miptree_level *lvl = &mt->level[level]; ++ struct nvc0_transfer *tx; ++ uint32_t size; ++ uint32_t w, h, d, z, layer; ++ int ret; ++ ++ if (mt->layout_3d) { ++ z = box->z; ++ d = u_minify(res->depth0, level); ++ layer = 0; ++ } else { ++ z = 0; ++ d = 1; ++ layer = box->z; ++ } ++ ++ tx = CALLOC_STRUCT(nvc0_transfer); ++ if (!tx) ++ return NULL; ++ ++ pipe_resource_reference(&tx->base.resource, res); ++ ++ tx->base.level = level; ++ tx->base.usage = usage; ++ tx->base.box = *box; ++ ++ tx->nblocksx = util_format_get_nblocksx(res->format, box->width); ++ tx->nblocksy = util_format_get_nblocksy(res->format, box->height); ++ ++ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); ++ tx->base.layer_stride = tx->nblocksy * tx->base.stride; ++ ++ w = u_minify(res->width0, level); ++ h = u_minify(res->height0, level); ++ ++ tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); ++ ++ tx->rect[0].bo = mt->base.bo; ++ tx->rect[0].base = lvl->offset + layer * mt->layer_stride; ++ tx->rect[0].tile_mode = lvl->tile_mode; ++ tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); ++ tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); ++ tx->rect[0].z = z; ++ tx->rect[0].width = util_format_get_nblocksx(res->format, w); ++ tx->rect[0].height = util_format_get_nblocksy(res->format, h); ++ tx->rect[0].depth = d; ++ tx->rect[0].pitch = lvl->pitch; ++ tx->rect[0].domain = NOUVEAU_BO_VRAM; ++ ++ size = tx->base.layer_stride; ++ ++ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, ++ size * tx->base.box.depth, &tx->rect[1].bo); ++ if (ret) { ++ FREE(tx); ++ return NULL; ++ } ++ ++ tx->rect[1].width = tx->nblocksx; ++ tx->rect[1].height = tx->nblocksy; ++ tx->rect[1].depth = 1; ++ tx->rect[1].pitch = tx->base.stride; ++ tx->rect[1].domain = NOUVEAU_BO_GART; ++ ++ if (usage & PIPE_TRANSFER_READ) { ++ unsigned i; ++ for (i = 0; i < box->depth; ++i) { ++ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], ++ tx->nblocksx, tx->nblocksy); ++ if (mt->layout_3d) ++ tx->rect[0].z++; ++ else ++ tx->rect[0].base += mt->layer_stride; ++ tx->rect[1].base += size; ++ } ++ } ++ tx->rect[0].z = z; ++ tx->rect[1].base = 0; ++ ++ return &tx->base; ++} ++ ++void ++nvc0_miptree_transfer_del(struct pipe_context *pctx, ++ struct pipe_transfer *transfer) ++{ ++ struct pipe_screen *pscreen = pctx->screen; ++ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; ++ struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource); ++ unsigned i; ++ ++ if (tx->base.usage & PIPE_TRANSFER_WRITE) { ++ for (i = 0; i < tx->base.box.depth; ++i) { ++ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], ++ tx->nblocksx, tx->nblocksy); ++ if (mt->layout_3d) ++ tx->rect[0].z++; ++ else ++ tx->rect[0].base += mt->layer_stride; ++ tx->rect[1].base += tx->nblocksy * tx->base.stride; ++ } ++ } ++ ++ nouveau_bo_ref(NULL, &tx->rect[1].bo); ++ pipe_resource_reference(&transfer->resource, NULL); ++ ++ FREE(tx); ++} ++ ++void * ++nvc0_miptree_transfer_map(struct pipe_context *pctx, ++ struct pipe_transfer *transfer) ++{ ++ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; ++ int ret; ++ unsigned flags = 0; ++ ++ if (tx->rect[1].bo->map) ++ return tx->rect[1].bo->map; ++ ++ if (transfer->usage & PIPE_TRANSFER_READ) ++ flags = NOUVEAU_BO_RD; ++ if (transfer->usage & PIPE_TRANSFER_WRITE) ++ flags |= NOUVEAU_BO_WR; ++ ++ ret = nouveau_bo_map(tx->rect[1].bo, flags); ++ if (ret) ++ return NULL; ++ return tx->rect[1].bo->map; ++} ++ ++void ++nvc0_miptree_transfer_unmap(struct pipe_context *pctx, ++ struct pipe_transfer *transfer) ++{ ++ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; ++ ++ nouveau_bo_unmap(tx->rect[1].bo); ++} ++ +diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h +new file mode 100644 +index 0000000..222f72d +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_transfer.h +@@ -0,0 +1,38 @@ ++ ++#ifndef __NVC0_TRANSFER_H__ ++#define __NVC0_TRANSFER_H__ ++ ++#include "pipe/p_state.h" ++ ++struct pipe_transfer * ++nvc0_miptree_transfer_new(struct pipe_context *pcontext, ++ struct pipe_resource *pt, ++ unsigned level, ++ unsigned usage, ++ const struct pipe_box *box); ++void ++nvc0_miptree_transfer_del(struct pipe_context *pcontext, ++ struct pipe_transfer *ptx); ++void * ++nvc0_miptree_transfer_map(struct pipe_context *pcontext, ++ struct pipe_transfer *ptx); ++void ++nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, ++ struct pipe_transfer *ptx); ++ ++struct nvc0_m2mf_rect { ++ struct nouveau_bo *bo; ++ uint32_t base; ++ unsigned domain; ++ uint32_t pitch; ++ uint32_t width; ++ uint32_t x; ++ uint32_t height; ++ uint32_t y; ++ uint16_t depth; ++ uint16_t z; ++ uint16_t tile_mode; ++ uint16_t cpp; ++}; ++ ++#endif +diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c +new file mode 100644 +index 0000000..a51a887 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_vbo.c +@@ -0,0 +1,671 @@ ++/* ++ * Copyright 2010 Christoph Bumiller ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF ++ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#include "pipe/p_context.h" ++#include "pipe/p_state.h" ++#include "util/u_inlines.h" ++#include "util/u_format.h" ++#include "translate/translate.h" ++ ++#include "nvc0_context.h" ++#include "nvc0_resource.h" ++ ++#include "nvc0_3d.xml.h" ++ ++void ++nvc0_vertex_state_delete(struct pipe_context *pipe, ++ void *hwcso) ++{ ++ struct nvc0_vertex_stateobj *so = hwcso; ++ ++ if (so->translate) ++ so->translate->release(so->translate); ++ FREE(hwcso); ++} ++ ++void * ++nvc0_vertex_state_create(struct pipe_context *pipe, ++ unsigned num_elements, ++ const struct pipe_vertex_element *elements) ++{ ++ struct nvc0_vertex_stateobj *so; ++ struct translate_key transkey; ++ unsigned i; ++ ++ assert(num_elements); ++ ++ so = MALLOC(sizeof(*so) + ++ (num_elements - 1) * sizeof(struct nvc0_vertex_element)); ++ if (!so) ++ return NULL; ++ so->num_elements = num_elements; ++ so->instance_elts = 0; ++ so->instance_bufs = 0; ++ ++ transkey.nr_elements = 0; ++ transkey.output_stride = 0; ++ ++ for (i = 0; i < num_elements; ++i) { ++ const struct pipe_vertex_element *ve = &elements[i]; ++ const unsigned vbi = ve->vertex_buffer_index; ++ enum pipe_format fmt = ve->src_format; ++ ++ so->element[i].pipe = elements[i]; ++ so->element[i].state = nvc0_format_table[fmt].vtx; ++ ++ if (!so->element[i].state) { ++ switch (util_format_get_nr_components(fmt)) { ++ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; ++ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; ++ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; ++ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; ++ default: ++ assert(0); ++ return NULL; ++ } ++ so->element[i].state = nvc0_format_table[fmt].vtx; ++ } ++ so->element[i].state |= i; ++ ++ if (1) { ++ unsigned j = transkey.nr_elements++; ++ ++ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; ++ transkey.element[j].input_format = ve->src_format; ++ transkey.element[j].input_buffer = vbi; ++ transkey.element[j].input_offset = ve->src_offset; ++ transkey.element[j].instance_divisor = ve->instance_divisor; ++ ++ transkey.element[j].output_format = fmt; ++ transkey.element[j].output_offset = transkey.output_stride; ++ transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; ++ ++ if (unlikely(ve->instance_divisor)) { ++ so->instance_elts |= 1 << i; ++ so->instance_bufs |= 1 << vbi; ++ } ++ } ++ } ++ ++ so->translate = translate_create(&transkey); ++ so->vtx_size = transkey.output_stride / 4; ++ so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1); ++ ++ return so; ++} ++ ++#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \ ++ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \ ++ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST ++ ++#define VTX_ATTR(a, c, t, s) \ ++ ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \ ++ (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \ ++ ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \ ++ ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT)) ++ ++static void ++nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb, ++ struct pipe_vertex_element *ve, unsigned attr) ++{ ++ const void *data; ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_resource *res = nvc0_resource(vb->buffer); ++ float v[4]; ++ int i; ++ const unsigned nc = util_format_get_nr_components(ve->src_format); ++ ++ data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset + ++ ve->src_offset, NOUVEAU_BO_RD); ++ ++ util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1); ++ ++ BEGIN_RING(chan, RING_3D(VTX_ATTR_DEFINE), nc + 1); ++ OUT_RING (chan, VTX_ATTR(attr, nc, FLOAT, 32)); ++ for (i = 0; i < nc; ++i) ++ OUT_RINGf(chan, v[i]); ++} ++ ++static INLINE void ++nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, ++ uint32_t *base, uint32_t *size) ++{ ++ if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { ++ /* TODO: use min and max instance divisor to get a proper range */ ++ *base = 0; ++ *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride; ++ } else { ++ assert(nvc0->vbo_max_index != ~0); ++ *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; ++ *size = (nvc0->vbo_max_index - ++ nvc0->vbo_min_index + 1) * nvc0->vtxbuf[vbi].stride; ++ } ++} ++ ++static void ++nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) ++{ ++ struct pipe_vertex_buffer *vb; ++ struct nvc0_resource *buf; ++ int i; ++ uint32_t base, size; ++ ++ nvc0->vbo_fifo = nvc0->vbo_user = 0; ++ ++ for (i = 0; i < nvc0->num_vtxbufs; ++i) { ++ vb = &nvc0->vtxbuf[i]; ++ if (!vb->stride) ++ continue; ++ buf = nvc0_resource(vb->buffer); ++ ++ if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { ++ if (nvc0->vbo_push_hint) { ++ nvc0->vbo_fifo = ~0; ++ continue; ++ } else { ++ if (buf->status & NVC0_BUFFER_STATUS_USER_MEMORY) { ++ nvc0->vbo_user |= 1 << i; ++ assert(vb->stride > vb->buffer_offset); ++ nvc0_vbuf_range(nvc0, i, &base, &size); ++ nvc0_user_buffer_upload(buf, base, size); ++ } else { ++ nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART); ++ } ++ nvc0->vbo_dirty = TRUE; ++ } ++ } ++ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD); ++ nvc0_buffer_adjust_score(nvc0, buf, 1); ++ } ++} ++ ++static void ++nvc0_update_user_vbufs(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ uint32_t base, offset, size; ++ int i; ++ uint32_t written = 0; ++ ++ for (i = 0; i < nvc0->vertex->num_elements; ++i) { ++ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; ++ const int b = ve->vertex_buffer_index; ++ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; ++ struct nvc0_resource *buf = nvc0_resource(vb->buffer); ++ ++ if (!(nvc0->vbo_user & (1 << b))) ++ continue; ++ ++ if (!vb->stride) { ++ nvc0_emit_vtxattr(nvc0, vb, ve, i); ++ continue; ++ } ++ nvc0_vbuf_range(nvc0, b, &base, &size); ++ ++ if (!(written & (1 << b))) { ++ written |= 1 << b; ++ nvc0_user_buffer_upload(buf, base, size); ++ } ++ offset = vb->buffer_offset + ve->src_offset; ++ ++ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); ++ OUT_RING (chan, i); ++ OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); ++ OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD); ++ OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); ++ OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); ++ } ++ nvc0->vbo_dirty = TRUE; ++} ++ ++void ++nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nvc0_vertex_stateobj *vertex = nvc0->vertex; ++ struct pipe_vertex_buffer *vb; ++ struct nvc0_vertex_element *ve; ++ unsigned i; ++ ++ nvc0_prevalidate_vbufs(nvc0); ++ ++ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); ++ for (i = 0; i < vertex->num_elements; ++i) { ++ ve = &vertex->element[i]; ++ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; ++ ++ if (likely(vb->stride) || nvc0->vbo_fifo) { ++ OUT_RING(chan, ve->state); ++ } else { ++ OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST); ++ nvc0->vbo_fifo &= ~(1 << i); ++ } ++ } ++ ++ for (i = 0; i < vertex->num_elements; ++i) { ++ struct nvc0_resource *res; ++ unsigned size, offset; ++ ++ ve = &vertex->element[i]; ++ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; ++ ++ if (unlikely(ve->pipe.instance_divisor)) { ++ if (!(nvc0->state.instance_elts & (1 << i))) { ++ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); ++ } ++ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1); ++ OUT_RING (chan, ve->pipe.instance_divisor); ++ } else ++ if (unlikely(nvc0->state.instance_elts & (1 << i))) { ++ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); ++ } ++ ++ res = nvc0_resource(vb->buffer); ++ ++ if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) { ++ if (!nvc0->vbo_fifo) ++ nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i); ++ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); ++ OUT_RING (chan, 0); ++ continue; ++ } ++ ++ size = vb->buffer->width0; ++ offset = ve->pipe.src_offset + vb->buffer_offset; ++ ++ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); ++ OUT_RING (chan, (1 << 12) | vb->stride); ++ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); ++ OUT_RING (chan, i); ++ OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD); ++ OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD); ++ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); ++ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); ++ } ++ for (; i < nvc0->state.num_vtxelts; ++i) { ++ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1); ++ OUT_RING (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE); ++ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); ++ OUT_RING (chan, 0); ++ } ++ ++ nvc0->state.num_vtxelts = vertex->num_elements; ++ nvc0->state.instance_elts = vertex->instance_elts; ++} ++ ++#define NVC0_PRIM_GL_CASE(n) \ ++ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n ++ ++static INLINE unsigned ++nvc0_prim_gl(unsigned prim) ++{ ++ switch (prim) { ++ NVC0_PRIM_GL_CASE(POINTS); ++ NVC0_PRIM_GL_CASE(LINES); ++ NVC0_PRIM_GL_CASE(LINE_LOOP); ++ NVC0_PRIM_GL_CASE(LINE_STRIP); ++ NVC0_PRIM_GL_CASE(TRIANGLES); ++ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); ++ NVC0_PRIM_GL_CASE(TRIANGLE_FAN); ++ NVC0_PRIM_GL_CASE(QUADS); ++ NVC0_PRIM_GL_CASE(QUAD_STRIP); ++ NVC0_PRIM_GL_CASE(POLYGON); ++ NVC0_PRIM_GL_CASE(LINES_ADJACENCY); ++ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); ++ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); ++ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); ++ /* ++ NVC0_PRIM_GL_CASE(PATCHES); */ ++ default: ++ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; ++ break; ++ } ++} ++ ++static void ++nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) ++{ ++ struct nvc0_context *nvc0 = chan->user_private; ++ ++ nvc0_bufctx_emit_relocs(nvc0); ++} ++ ++#if 0 ++static struct nouveau_bo * ++nvc0_tfb_setup(struct nvc0_context *nvc0) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ struct nouveau_bo *tfb = NULL; ++ int ret, i; ++ ++ ret = nouveau_bo_new(nvc0->screen->base.device, ++ NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb); ++ if (ret) ++ return NULL; ++ ++ ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR); ++ if (ret) ++ return NULL; ++ memset(tfb->map, 0xee, 8 * 4 * 3); ++ nouveau_bo_unmap(tfb); ++ ++ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5); ++ OUT_RING (chan, 1); ++ OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); ++ OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); ++ OUT_RING (chan, tfb->size); ++ OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */ ++ BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3); ++ OUT_RING (chan, 0); ++ OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */ ++ OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */ ++ BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2); ++ OUT_RING (chan, 0x1f1e1d1c); ++ OUT_RING (chan, 0xa3a2a1a0); ++ for (i = 1; i < 4; ++i) { ++ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1); ++ OUT_RING (chan, 0); ++ } ++ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D_(0x135c), 1); ++ OUT_RING (chan, 1); ++ BEGIN_RING(chan, RING_3D_(0x135c), 1); ++ OUT_RING (chan, 0); ++ ++ return tfb; ++} ++#endif ++ ++static void ++nvc0_draw_arrays(struct nvc0_context *nvc0, ++ unsigned mode, unsigned start, unsigned count, ++ unsigned instance_count) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ unsigned prim; ++ ++ chan->flush_notify = nvc0_draw_vbo_flush_notify; ++ chan->user_private = nvc0; ++ ++ prim = nvc0_prim_gl(mode); ++ ++ while (instance_count--) { ++ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); ++ OUT_RING (chan, prim); ++ BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2); ++ OUT_RING (chan, start); ++ OUT_RING (chan, count); ++ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); ++ ++ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ } ++ ++ chan->flush_notify = NULL; ++} ++ ++static void ++nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, ++ unsigned start, unsigned count) ++{ ++ map += start; ++ ++ if (count & 3) { ++ unsigned i; ++ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3); ++ for (i = 0; i < (count & 3); ++i) ++ OUT_RING(chan, *map++); ++ count &= ~3; ++ } ++ while (count) { ++ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; ++ ++ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr); ++ for (i = 0; i < nr; ++i) { ++ OUT_RING(chan, ++ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); ++ map += 4; ++ } ++ count -= nr * 4; ++ } ++} ++ ++static void ++nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map, ++ unsigned start, unsigned count) ++{ ++ map += start; ++ ++ if (count & 1) { ++ count &= ~1; ++ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); ++ OUT_RING (chan, *map++); ++ } ++ while (count) { ++ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; ++ ++ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); ++ for (i = 0; i < nr; ++i) { ++ OUT_RING(chan, (map[1] << 16) | map[0]); ++ map += 2; ++ } ++ count -= nr * 2; ++ } ++} ++ ++static void ++nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map, ++ unsigned start, unsigned count) ++{ ++ map += start; ++ ++ while (count) { ++ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); ++ ++ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); ++ OUT_RINGp (chan, map, nr); ++ ++ map += nr; ++ count -= nr; ++ } ++} ++ ++static void ++nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, ++ unsigned start, unsigned count) ++{ ++ map += start; ++ ++ if (count & 1) { ++ count--; ++ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); ++ OUT_RING (chan, *map++); ++ } ++ while (count) { ++ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; ++ ++ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); ++ for (i = 0; i < nr; ++i) { ++ OUT_RING(chan, (map[1] << 16) | map[0]); ++ map += 2; ++ } ++ count -= nr * 2; ++ } ++} ++ ++static void ++nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, ++ unsigned mode, unsigned start, unsigned count, ++ unsigned instance_count, int32_t index_bias) ++{ ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ void *data; ++ unsigned prim; ++ const unsigned index_size = nvc0->idxbuf.index_size; ++ ++ chan->flush_notify = nvc0_draw_vbo_flush_notify; ++ chan->user_private = nvc0; ++ ++ prim = nvc0_prim_gl(mode); ++ ++ if (index_bias != nvc0->state.index_bias) { ++ BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1); ++ OUT_RING (chan, index_bias); ++ nvc0->state.index_bias = index_bias; ++ } ++ ++ if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) { ++ struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); ++ unsigned offset = nvc0->idxbuf.offset; ++ unsigned limit = nvc0->idxbuf.buffer->width0 - 1; ++ ++ nvc0_buffer_adjust_score(nvc0, res, 1); ++ ++ while (instance_count--) { ++ MARK_RING (chan, 11, 4); ++ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); ++ OUT_RING (chan, mode); ++ BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7); ++ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); ++ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); ++ OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD); ++ OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD); ++ OUT_RING (chan, index_size >> 1); ++ OUT_RING (chan, start); ++ OUT_RING (chan, count); ++ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); ++ ++ nvc0_resource_fence(res, NOUVEAU_BO_RD); ++ ++ mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ } ++ } else { ++ data = nvc0_resource_map_offset(nvc0, nvc0_resource(nvc0->idxbuf.buffer), ++ nvc0->idxbuf.offset, NOUVEAU_BO_RD); ++ if (!data) ++ return; ++ ++ while (instance_count--) { ++ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); ++ OUT_RING (chan, prim); ++ switch (index_size) { ++ case 1: ++ nvc0_draw_elements_inline_u08(chan, data, start, count); ++ break; ++ case 2: ++ nvc0_draw_elements_inline_u16(chan, data, start, count); ++ break; ++ case 4: ++ if (shorten) ++ nvc0_draw_elements_inline_u32_short(chan, data, start, count); ++ else ++ nvc0_draw_elements_inline_u32(chan, data, start, count); ++ break; ++ default: ++ assert(0); ++ return; ++ } ++ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); ++ ++ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ } ++ } ++ ++ chan->flush_notify = NULL; ++} ++ ++void ++nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) ++{ ++ struct nvc0_context *nvc0 = nvc0_context(pipe); ++ struct nouveau_channel *chan = nvc0->screen->base.channel; ++ ++ /* For picking only a few vertices from a large user buffer, push is better, ++ * if index count is larger and we expect repeated vertices, suggest upload. ++ */ ++ nvc0->vbo_push_hint = /* the 64 is heuristic */ ++ !(info->indexed && ++ ((info->max_index - info->min_index + 64) < info->count)); ++ ++ nvc0->vbo_min_index = info->min_index; ++ nvc0->vbo_max_index = info->max_index; ++ ++ if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) ++ nvc0_update_user_vbufs(nvc0); ++ ++ nvc0_state_validate(nvc0); ++ ++ if (nvc0->vbo_fifo) { ++ nvc0_push_vbo(nvc0, info); ++ return; ++ } ++ ++ if (nvc0->state.instance_base != info->start_instance) { ++ nvc0->state.instance_base = info->start_instance; ++ /* NOTE: this does not affect the shader input, should it ? */ ++ BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1); ++ OUT_RING (chan, info->start_instance); ++ } ++ ++ if (nvc0->vbo_dirty) { ++ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1); ++ OUT_RING (chan, 0); ++ nvc0->vbo_dirty = FALSE; ++ } ++ ++ if (!info->indexed) { ++ nvc0_draw_arrays(nvc0, ++ info->mode, info->start, info->count, ++ info->instance_count); ++ } else { ++ boolean shorten = info->max_index <= 65535; ++ ++ assert(nvc0->idxbuf.buffer); ++ ++ if (info->primitive_restart != nvc0->state.prim_restart) { ++ if (info->primitive_restart) { ++ BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2); ++ OUT_RING (chan, 1); ++ OUT_RING (chan, info->restart_index); ++ ++ if (info->restart_index > 65535) ++ shorten = FALSE; ++ } else { ++ IMMED_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0); ++ } ++ nvc0->state.prim_restart = info->primitive_restart; ++ } else ++ if (info->primitive_restart) { ++ BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1); ++ OUT_RING (chan, info->restart_index); ++ ++ if (info->restart_index > 65535) ++ shorten = FALSE; ++ } ++ ++ nvc0_draw_elements(nvc0, shorten, ++ info->mode, info->start, info->count, ++ info->instance_count, info->index_bias); ++ } ++} +diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h +new file mode 100644 +index 0000000..1544fb7 +--- /dev/null ++++ b/src/gallium/drivers/nvc0/nvc0_winsys.h +@@ -0,0 +1,120 @@ ++ ++#ifndef __NVC0_WINSYS_H__ ++#define __NVC0_WINSYS_H__ ++ ++#include ++#include ++#include "pipe/p_defines.h" ++ ++#include "nouveau/nouveau_bo.h" ++#include "nouveau/nouveau_channel.h" ++#include "nouveau/nouveau_grobj.h" ++#include "nouveau/nouveau_device.h" ++#include "nouveau/nouveau_resource.h" ++#include "nouveau/nouveau_pushbuf.h" ++#include "nouveau/nouveau_reloc.h" ++ ++#include "nvc0_resource.h" /* OUT_RESRC */ ++ ++#ifndef NV04_PFIFO_MAX_PACKET_LEN ++#define NV04_PFIFO_MAX_PACKET_LEN 2047 ++#endif ++ ++#define NVC0_SUBCH_3D 1 ++#define NVC0_SUBCH_2D 2 ++#define NVC0_SUBCH_MF 3 ++ ++#define NVC0_MF_(n) NVC0_M2MF_##n ++ ++#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2)) ++#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2)) ++#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2)) ++ ++#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2)) ++#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2)) ++#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2)) ++ ++#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2)) ++ ++int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); ++ ++static inline uint32_t ++nouveau_bo_tile_layout(struct nouveau_bo *bo) ++{ ++ return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK; ++} ++ ++static INLINE void ++nouveau_bo_validate(struct nouveau_channel *chan, ++ struct nouveau_bo *bo, unsigned flags) ++{ ++ nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0); ++} ++ ++/* incremental methods */ ++static INLINE void ++BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size) ++{ ++ WAIT_RING(chan, size + 1); ++ OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd); ++} ++ ++/* non-incremental */ ++static INLINE void ++BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size) ++{ ++ WAIT_RING(chan, size + 1); ++ OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd); ++} ++ ++/* increment-once */ ++static INLINE void ++BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size) ++{ ++ WAIT_RING(chan, size + 1); ++ OUT_RING (chan, (0xa << 28) | (size << 16) | mthd); ++} ++ ++/* inline-data */ ++static INLINE void ++IMMED_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data) ++{ ++ WAIT_RING(chan, 1); ++ OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd); ++} ++ ++static INLINE int ++OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res, ++ unsigned delta, unsigned flags) ++{ ++ return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); ++} ++ ++static INLINE int ++OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, ++ unsigned delta, unsigned flags) ++{ ++ if (flags & NOUVEAU_BO_WR) ++ res->status |= NVC0_BUFFER_STATUS_DIRTY; ++ return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); ++} ++ ++static INLINE void ++BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s) ++{ ++ struct nouveau_subchannel *subc = &gr->channel->subc[s]; ++ ++ assert(s < 8); ++ if (subc->gr) { ++ assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT); ++ subc->gr->bound = NOUVEAU_GROBJ_UNBOUND; ++ } ++ subc->gr = gr; ++ subc->gr->subc = s; ++ subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT; ++ ++ BEGIN_RING(chan, RING_GR(gr, 0x0000), 1); ++ OUT_RING (chan, gr->grclass); ++} ++ ++#endif +diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c +index e0e65e7..e2fadd3 100644 +--- a/src/gallium/drivers/nvfx/nv04_2d.c ++++ b/src/gallium/drivers/nvfx/nv04_2d.c +@@ -34,11 +34,11 @@ + #include + #include + #include +-#include + #include + #include + #include + #include ++#include + #include "nv04_2d.h" + + #include "nouveau/nv_object.xml.h" +diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c +index 951fb20..b609891 100644 +--- a/src/gallium/drivers/nvfx/nv30_fragtex.c ++++ b/src/gallium/drivers/nvfx/nv30_fragtex.c +@@ -71,6 +71,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit) + struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; + struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned txf; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + unsigned use_rect; +@@ -102,7 +103,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit) + txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)]; + + MARK_RING(chan, 9, 2); +- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); ++ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8); + OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, txf, + tex_flags | NOUVEAU_BO_OR, +diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c +index e8ab403..563183d 100644 +--- a/src/gallium/drivers/nvfx/nv40_fragtex.c ++++ b/src/gallium/drivers/nvfx/nv40_fragtex.c +@@ -76,6 +76,7 @@ void + nv40_fragtex_set(struct nvfx_context *nvfx, int unit) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; + struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; + struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; +@@ -87,7 +88,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit) + txf = sv->u.nv40.fmt[ps->compare] | ps->fmt; + + MARK_RING(chan, 11, 2); +- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); ++ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8); + OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, + NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); +@@ -97,7 +98,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit) + OUT_RING(chan, ps->filt | sv->filt); + OUT_RING(chan, sv->npot_size); + OUT_RING(chan, ps->bcol); +- OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_TEX_SIZE1(unit), 1); + OUT_RING(chan, sv->u.nv40.npot_size2); + + nvfx->hw_txf[unit] = txf; +diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c +index 95834d2..6c8934d 100644 +--- a/src/gallium/drivers/nvfx/nvfx_context.c ++++ b/src/gallium/drivers/nvfx/nvfx_context.c +@@ -13,13 +13,13 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags, + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; ++ struct nouveau_grobj *eng3d = screen->eng3d; + + /* XXX: we need to actually be intelligent here */ + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { +- WAIT_RING(chan, 4); +- OUT_RING(chan, RING_3D(0x1fd8, 1)); ++ BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING(chan, 2); +- OUT_RING(chan, RING_3D(0x1fd8, 1)); ++ BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING(chan, 1); + } + +diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h +index 6ef2a69..2238aa1 100644 +--- a/src/gallium/drivers/nvfx/nvfx_context.h ++++ b/src/gallium/drivers/nvfx/nvfx_context.h +@@ -339,30 +339,31 @@ extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx); + /* nvfx_push.c */ + extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); + +-/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */ +-static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp) ++static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, ++ struct nouveau_grobj *eng3d, unsigned attrib, const float* v, ++ unsigned ncomp) + { + switch (ncomp) { + case 4: +- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_4F_X(attrib), 4); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + OUT_RING(chan, fui(v[3])); + break; + case 3: +- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_3F_X(attrib), 3); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + break; + case 2: +- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_2F_X(attrib), 2); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + break; + case 1: +- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_1F(attrib), 1); + OUT_RING(chan, fui(v[0])); + break; + } +diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c +index 61f888a..81f1ec4 100644 +--- a/src/gallium/drivers/nvfx/nvfx_draw.c ++++ b/src/gallium/drivers/nvfx/nvfx_draw.c +@@ -28,10 +28,10 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags) + struct nvfx_render_stage *rs = nvfx_render_stage(stage); + struct nvfx_context *nvfx = rs->nvfx; + struct nouveau_channel *chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { +- assert(AVAIL_RING(chan) >= 2); +- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); + OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); + rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP; + } +@@ -46,6 +46,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, + + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; ++ struct nouveau_grobj *eng3d = screen->eng3d; + boolean no_elements = nvfx->vertprog->draw_no_elements; + unsigned num_attribs = nvfx->vertprog->draw_elements; + +@@ -63,7 +64,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { +- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); + OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); + } + +@@ -74,23 +75,24 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, + int i; + for(i = 0; i < 32; ++i) + { +- OUT_RING(chan, RING_3D(0x1dac, 1)); ++ BEGIN_RING(chan, eng3d, 0x1dac, 1); + OUT_RING(chan, 0); + } + } + +- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1); + OUT_RING (chan, mode); + rs->prim = mode; + } + +- OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count)); + if(no_elements) { ++ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, 4); + OUT_RING(chan, 0); + OUT_RING(chan, 0); + OUT_RING(chan, 0); + OUT_RING(chan, 0); + } else { ++ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, num_attribs * 4 * count); + for (unsigned i = 0; i < count; ++i) + { + struct vertex_header* v = prim->v[i]; +diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c +index 13e8bee..dbd7c77 100644 +--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c ++++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c +@@ -1189,12 +1189,12 @@ out_err: + static inline void + nvfx_fp_memcpy(void* dst, const void* src, size_t len) + { +-#ifndef WORDS_BIGENDIAN ++#ifndef PIPE_ARCH_BIG_ENDIAN + memcpy(dst, src, len); + #else + size_t i; + for(i = 0; i < len; i += 4) { +- uint32_t v = (uint32_t*)((char*)src + i); ++ uint32_t v = *(uint32_t*)((char*)src + i); + *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16); + } + #endif +@@ -1233,6 +1233,7 @@ void + nvfx_fragprog_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; + struct nvfx_vertex_program* vp; + +@@ -1499,17 +1500,17 @@ update: + nvfx->hw_fragprog = fp; + + MARK_RING(chan, 8, 1); +- OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_FP_ACTIVE_PROGRAM, 1); + OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); +- OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_FP_CONTROL, 1); + OUT_RING(chan, fp->fp_control); + if(!nvfx->is_nv4x) { +- OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_FP_REG_CONTROL, 1); + OUT_RING(chan, (1<<16)|0x4); +- OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_TEX_UNITS_ENABLE, 1); + OUT_RING(chan, fp->samplers); + } + } +@@ -1518,8 +1519,7 @@ update: + unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization; + if(pointsprite_control != nvfx->hw_pointsprite_control) + { +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_POINT_SPRITE, 1); + OUT_RING(chan, pointsprite_control); + nvfx->hw_pointsprite_control = pointsprite_control; + } +diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c +index fd0aff6..1c4901d 100644 +--- a/src/gallium/drivers/nvfx/nvfx_fragtex.c ++++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c +@@ -177,6 +177,7 @@ void + nvfx_fragtex_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned samplers, unit; + + samplers = nvfx->dirty_samplers; +@@ -197,9 +198,8 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) + else + nv40_fragtex_set(nvfx, unit); + } else { +- WAIT_RING(chan, 2); + /* this is OK for nv40 too */ +- OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_TEX_ENABLE(unit), 1); + OUT_RING(chan, 0); + nvfx->hw_samplers &= ~(1 << unit); + } +diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c +index ebf47e6..6391741 100644 +--- a/src/gallium/drivers/nvfx/nvfx_push.c ++++ b/src/gallium/drivers/nvfx/nvfx_push.c +@@ -10,6 +10,7 @@ + + struct push_context { + struct nouveau_channel* chan; ++ struct nouveau_grobj *eng3d; + + void *idxbuf; + int32_t idxbias; +@@ -27,9 +28,10 @@ static void + emit_edgeflag(void *priv, boolean enabled) + { + struct push_context* ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + struct nouveau_channel *chan = ctx->chan; + +- OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1); + OUT_RING(chan, enabled ? 1 : 0); + } + +@@ -37,6 +39,7 @@ static void + emit_vertices_lookup8(void *priv, unsigned start, unsigned count) + { + struct push_context *ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + uint8_t* elts = (uint8_t*)ctx->idxbuf + start; + + while(count) +@@ -44,7 +47,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count) + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + +- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); ++ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); + ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + +@@ -57,6 +60,7 @@ static void + emit_vertices_lookup16(void *priv, unsigned start, unsigned count) + { + struct push_context *ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + uint16_t* elts = (uint16_t*)ctx->idxbuf + start; + + while(count) +@@ -64,7 +68,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count) + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + +- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); ++ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); + ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + +@@ -77,6 +81,7 @@ static void + emit_vertices_lookup32(void *priv, unsigned start, unsigned count) + { + struct push_context *ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + uint32_t* elts = (uint32_t*)ctx->idxbuf + start; + + while(count) +@@ -84,7 +89,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count) + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + +- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); ++ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); + ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + +@@ -97,13 +102,14 @@ static void + emit_vertices(void *priv, unsigned start, unsigned count) + { + struct push_context *ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + +- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); ++ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length); + ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + +@@ -116,10 +122,11 @@ static void + emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) + { + struct push_context* ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + struct nouveau_channel *chan = ctx->chan; + unsigned nr = (vc & 0xff); + if (nr) { +- OUT_RING(chan, RING_3D(reg, 1)); ++ BEGIN_RING(chan, eng3d, reg, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); + start += nr; + } +@@ -130,7 +137,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) + + nr -= push; + +- OUT_RING(chan, RING_3D_NI(reg, push)); ++ BEGIN_RING_NI(chan, eng3d, reg, push); + while (push--) { + OUT_RING(chan, ((0x100 - 1) << 24) | start); + start += 0x100; +@@ -154,12 +161,13 @@ static INLINE void + emit_elt8(void* priv, unsigned start, unsigned vc) + { + struct push_context* ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + struct nouveau_channel *chan = ctx->chan; + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + if (vc & 1) { +- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); + elts++; vc--; + } +@@ -168,7 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc) + unsigned i; + unsigned push = MIN2(vc, 2047 * 2); + +- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); ++ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); + +@@ -181,12 +189,13 @@ static INLINE void + emit_elt16(void* priv, unsigned start, unsigned vc) + { + struct push_context* ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + struct nouveau_channel *chan = ctx->chan; + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + if (vc & 1) { +- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); + elts++; vc--; + } +@@ -195,7 +204,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc) + unsigned i; + unsigned push = MIN2(vc, 2047 * 2); + +- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); ++ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); + +@@ -208,6 +217,7 @@ static INLINE void + emit_elt32(void* priv, unsigned start, unsigned vc) + { + struct push_context* ctx = priv; ++ struct nouveau_grobj *eng3d = ctx->eng3d; + struct nouveau_channel *chan = ctx->chan; + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; +@@ -215,8 +225,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc) + while (vc) { + unsigned push = MIN2(vc, 2047); + +- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push)); +- assert(AVAIL_RING(chan) >= push); ++ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push); + if(idxbias) + { + for(unsigned i = 0; i < push; ++i) +@@ -235,6 +244,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + { + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_channel *chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct push_context ctx; + struct util_split_prim s; + unsigned instances_left = info->instance_count; +@@ -251,6 +261,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + + 4; /* potential edgeflag enable/disable */ + + ctx.chan = nvfx->screen->base.channel; ++ ctx.eng3d = nvfx->screen->eng3d; + ctx.translate = nvfx->vtxelt->translate; + ctx.idxbuf = NULL; + ctx.vertex_length = nvfx->vtxelt->vertex_length; +@@ -333,8 +344,9 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + + nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); + +- WAIT_RING(chan, 5); +- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); ++ nvfx_emit_vtx_attr(chan, eng3d, ++ nvfx->vtxelt->per_instance[i].base.idx, v, ++ nvfx->vtxelt->per_instance[i].base.ncomp); + } + + /* per-instance loop */ +@@ -374,15 +386,18 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + int i; + for(i = 0; i < 32; ++i) + { +- OUT_RING(chan, RING_3D(0x1dac, 1)); ++ BEGIN_RING(chan, eng3d, ++ 0x1dac, 1); + OUT_RING(chan, 0); + } + } + +- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); ++ BEGIN_RING(chan, eng3d, ++ NV30_3D_VERTEX_BEGIN_END, 1); + OUT_RING(chan, hw_mode); + done = util_split_prim_next(&s, max_verts); +- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); ++ BEGIN_RING(chan, eng3d, ++ NV30_3D_VERTEX_BEGIN_END, 1); + OUT_RING(chan, 0); + + if(done) +@@ -406,8 +421,10 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + per_instance[i].step = 0; + + nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); +- WAIT_RING(chan, 5); +- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); ++ nvfx_emit_vtx_attr(chan, eng3d, ++ nvfx->vtxelt->per_instance[i].base.idx, ++ v, ++ nvfx->vtxelt->per_instance[i].base.ncomp); + } + } + } +diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c +index 3935ffd..3cd6bf1 100644 +--- a/src/gallium/drivers/nvfx/nvfx_query.c ++++ b/src/gallium/drivers/nvfx/nvfx_query.c +@@ -49,6 +49,7 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) + struct nvfx_query *q = nvfx_query(pq); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; ++ struct nouveau_grobj *eng3d = screen->eng3d; + uint64_t tmp; + + assert(!nvfx->query); +@@ -72,10 +73,9 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) + + nouveau_notifier_reset(nvfx->screen->query, q->object->start); + +- WAIT_RING(chan, 4); +- OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_RESET, 1); + OUT_RING(chan, 1); +- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); + OUT_RING(chan, 1); + + q->ready = FALSE; +@@ -88,15 +88,15 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) + { + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_channel *chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nvfx_query *q = nvfx_query(pq); + + assert(nvfx->query == pq); + +- WAIT_RING(chan, 4); +- OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) | + ((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT)); +- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); + OUT_RING(chan, 0); + FIRE_RING(chan); + +diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c +index 92e1d33..aa1e956 100644 +--- a/src/gallium/drivers/nvfx/nvfx_screen.c ++++ b/src/gallium/drivers/nvfx/nvfx_screen.c +@@ -301,98 +301,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) + static void nv30_screen_init(struct nvfx_screen *screen) + { + struct nouveau_channel *chan = screen->base.channel; ++ struct nouveau_grobj *eng3d = screen->eng3d; + int i; + + /* TODO: perhaps we should do some of this on nv40 too? */ + for (i=1; i<8; i++) { +- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING(chan, 0); +- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING(chan, 0); + } + +- OUT_RING(chan, RING_3D(0x220, 1)); ++ BEGIN_RING(chan, eng3d, 0x220, 1); + OUT_RING(chan, 1); + +- OUT_RING(chan, RING_3D(0x03b0, 1)); ++ BEGIN_RING(chan, eng3d, 0x03b0, 1); + OUT_RING(chan, 0x00100000); +- OUT_RING(chan, RING_3D(0x1454, 1)); ++ BEGIN_RING(chan, eng3d, 0x1454, 1); + OUT_RING(chan, 0); +- OUT_RING(chan, RING_3D(0x1d80, 1)); ++ BEGIN_RING(chan, eng3d, 0x1d80, 1); + OUT_RING(chan, 3); +- OUT_RING(chan, RING_3D(0x1450, 1)); ++ BEGIN_RING(chan, eng3d, 0x1450, 1); + OUT_RING(chan, 0x00030004); + + /* NEW */ +- OUT_RING(chan, RING_3D(0x1e98, 1)); ++ BEGIN_RING(chan, eng3d, 0x1e98, 1); + OUT_RING(chan, 0); +- OUT_RING(chan, RING_3D(0x17e0, 3)); ++ BEGIN_RING(chan, eng3d, 0x17e0, 3); + OUT_RING(chan, fui(0.0)); + OUT_RING(chan, fui(0.0)); + OUT_RING(chan, fui(1.0)); +- OUT_RING(chan, RING_3D(0x1f80, 16)); ++ BEGIN_RING(chan, eng3d, 0x1f80, 16); + for (i=0; i<16; i++) { + OUT_RING(chan, (i==8) ? 0x0000ffff : 0); + } + +- OUT_RING(chan, RING_3D(0x120, 3)); ++ BEGIN_RING(chan, eng3d, 0x120, 3); + OUT_RING(chan, 0); + OUT_RING(chan, 1); + OUT_RING(chan, 2); + +- OUT_RING(chan, RING_3D(0x1d88, 1)); ++ BEGIN_RING(chan, eng3d, 0x1d88, 1); + OUT_RING(chan, 0x00001200); + +- OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1); + OUT_RING(chan, 0); + +- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2); + OUT_RING(chan, fui(0.0)); + OUT_RING(chan, fui(1.0)); + +- OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1); + OUT_RING(chan, 0xffff0000); + + /* enables use of vp rather than fixed-function somehow */ +- OUT_RING(chan, RING_3D(0x1e94, 1)); ++ BEGIN_RING(chan, eng3d, 0x1e94, 1); + OUT_RING(chan, 0x13); + } + + static void nv40_screen_init(struct nvfx_screen *screen) + { + struct nouveau_channel *chan = screen->base.channel; ++ struct nouveau_grobj *eng3d = screen->eng3d; + +- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2)); ++ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2); + OUT_RING(chan, screen->base.channel->vram->handle); + OUT_RING(chan, screen->base.channel->vram->handle); + +- OUT_RING(chan, RING_3D(0x1450, 1)); ++ BEGIN_RING(chan, eng3d, 0x1450, 1); + OUT_RING(chan, 0x00000004); + +- OUT_RING(chan, RING_3D(0x1ea4, 3)); ++ BEGIN_RING(chan, eng3d, 0x1ea4, 3); + OUT_RING(chan, 0x00000010); + OUT_RING(chan, 0x01000100); + OUT_RING(chan, 0xff800006); + + /* vtxprog output routing */ +- OUT_RING(chan, RING_3D(0x1fc4, 1)); ++ BEGIN_RING(chan, eng3d, 0x1fc4, 1); + OUT_RING(chan, 0x06144321); +- OUT_RING(chan, RING_3D(0x1fc8, 2)); ++ BEGIN_RING(chan, eng3d, 0x1fc8, 2); + OUT_RING(chan, 0xedcba987); + OUT_RING(chan, 0x0000006f); +- OUT_RING(chan, RING_3D(0x1fd0, 1)); ++ BEGIN_RING(chan, eng3d, 0x1fd0, 1); + OUT_RING(chan, 0x00171615); +- OUT_RING(chan, RING_3D(0x1fd4, 1)); ++ BEGIN_RING(chan, eng3d, 0x1fd4, 1); + OUT_RING(chan, 0x001b1a19); + +- OUT_RING(chan, RING_3D(0x1ef8, 1)); ++ BEGIN_RING(chan, eng3d, 0x1ef8, 1); + OUT_RING(chan, 0x0020ffff); +- OUT_RING(chan, RING_3D(0x1d64, 1)); ++ BEGIN_RING(chan, eng3d, 0x1d64, 1); + OUT_RING(chan, 0x01d300d4); +- OUT_RING(chan, RING_3D(0x1e94, 1)); ++ BEGIN_RING(chan, eng3d, 0x1e94, 1); + OUT_RING(chan, 0x00000001); + +- OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1); + OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN); + } + +@@ -571,25 +573,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) + + /* Static eng3d initialisation */ + /* note that we just started using the channel, so we must have space in the pushbuffer */ +- OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1)); ++ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1); + OUT_RING(chan, screen->sync->handle); +- OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2)); ++ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->gart->handle); +- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); ++ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1); + OUT_RING(chan, chan->vram->handle); +- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2)); ++ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->vram->handle); +- OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2)); ++ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->gart->handle); + +- OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2)); ++ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2); + OUT_RING(chan, 0); + OUT_RING(chan, screen->query->handle); + +- OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2)); ++ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->vram->handle); + +diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c +index 5461903..f3dcb20 100644 +--- a/src/gallium/drivers/nvfx/nvfx_state.c ++++ b/src/gallium/drivers/nvfx/nvfx_state.c +@@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + { + struct nvfx_context *nvfx = nvfx_context(pipe); + +- nvfx->constbuf[shader] = buf; ++ pipe_resource_reference(&nvfx->constbuf[shader], buf); + nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0; + + if (shader == PIPE_SHADER_VERTEX) { +diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c +index 501fdd4..40ae4f5 100644 +--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c ++++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c +@@ -7,11 +7,11 @@ void + nvfx_state_viewport_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel *chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct pipe_viewport_state *vpt = &nvfx->viewport; + +- WAIT_RING(chan, 11); + if(nvfx->render_mode == HW) { +- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8); + OUT_RINGf(chan, vpt->translate[0]); + OUT_RINGf(chan, vpt->translate[1]); + OUT_RINGf(chan, vpt->translate[2]); +@@ -20,10 +20,10 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) + OUT_RINGf(chan, vpt->scale[1]); + OUT_RINGf(chan, vpt->scale[2]); + OUT_RINGf(chan, vpt->scale[3]); +- OUT_RING(chan, RING_3D(0x1d78, 1)); ++ BEGIN_RING(chan, eng3d, 0x1d78, 1); + OUT_RING(chan, 1); + } else { +- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); +@@ -32,7 +32,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); +- OUT_RING(chan, RING_3D(0x1d78, 1)); ++ BEGIN_RING(chan, eng3d, 0x1d78, 1); + OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1); + } + } +@@ -41,6 +41,7 @@ void + nvfx_state_scissor_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel *chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct pipe_scissor_state *s = &nvfx->scissor; + +@@ -48,8 +49,7 @@ nvfx_state_scissor_validate(struct nvfx_context *nvfx) + return; + nvfx->state.scissor_enabled = rast->scissor; + +- WAIT_RING(chan, 3); +- OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_SCISSOR_HORIZ, 2); + if (nvfx->state.scissor_enabled) { + OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny); +@@ -63,12 +63,12 @@ void + nvfx_state_sr_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct pipe_stencil_ref *sr = &nvfx->stencil_ref; + +- WAIT_RING(chan, 4); +- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(0), 1); + OUT_RING(chan, sr->ref_value[0]); +- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(1), 1); + OUT_RING(chan, sr->ref_value[1]); + } + +@@ -76,10 +76,10 @@ void + nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct pipe_blend_color *bcol = &nvfx->blend_colour; + +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_BLEND_COLOR, 1); + OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | +@@ -90,9 +90,9 @@ void + nvfx_state_stipple_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel *chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + +- WAIT_RING(chan, 33); +- OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32)); ++ BEGIN_RING(chan, eng3d, NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32); + OUT_RINGp(chan, nvfx->stipple, 32); + } + +@@ -100,12 +100,12 @@ static void + nvfx_coord_conventions_validate(struct nvfx_context* nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned value = nvfx->hw_fragprog->coord_conventions; + if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED) + value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT; + +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_COORD_CONVENTIONS, 1); + OUT_RING(chan, value); + } + +@@ -113,6 +113,7 @@ static void + nvfx_ucp_validate(struct nvfx_context* nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned enables[7] = + { + 0, +@@ -126,17 +127,15 @@ nvfx_ucp_validate(struct nvfx_context* nvfx) + + if(!nvfx->use_vp_clipping) + { +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1); + OUT_RING(chan, 0); + +- WAIT_RING(chan, 6 * 4 + 1); +- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANE(0, 0), ++ nvfx->clip.nr * 4); + OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4); + } + +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1); + OUT_RING(chan, enables[nvfx->clip.nr]); + } + +@@ -144,38 +143,37 @@ static void + nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned i; + struct nvfx_vertex_program* vp = nvfx->hw_vertprog; + if(nvfx->clip.nr != vp->clip_nr) + { + unsigned idx; +- WAIT_RING(chan, 14); + + /* remove last instruction bit */ + if(vp->clip_nr >= 0) + { + idx = vp->nr_insns - 7 + vp->clip_nr; +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); + OUT_RING(chan, vp->exec->start + idx); +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[idx].data, 4); + } + + /* set last instruction bit */ + idx = vp->nr_insns - 7 + nvfx->clip.nr; +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); + OUT_RING(chan, vp->exec->start + idx); +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); + OUT_RINGp(chan, vp->insns[idx].data, 3); + OUT_RING(chan, vp->insns[idx].data[3] | 1); + vp->clip_nr = nvfx->clip.nr; + } + + // TODO: only do this for the ones changed +- WAIT_RING(chan, 6 * 6); + for(i = 0; i < nvfx->clip.nr; ++i) + { +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5); + OUT_RING(chan, vp->data->start + i); + OUT_RINGp (chan, nvfx->clip.ucp[i], 4); + } +@@ -185,6 +183,7 @@ static boolean + nvfx_state_validate_common(struct nvfx_context *nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned dirty; + unsigned still_dirty = 0; + int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */ +@@ -287,8 +286,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) + + if(vp_output != nvfx->hw_vp_output) + { +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_VP_RESULT_EN, 1); + OUT_RING(chan, vp_output); + nvfx->hw_vp_output = vp_output; + } +@@ -320,8 +318,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) + + if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0)) + { +- WAIT_RING(chan, 3); +- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2); + OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask); + OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); + } +@@ -334,10 +331,9 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) + // TODO: what about nv30? + if(nvfx->is_nv4x) + { +- WAIT_RING(chan, 4); +- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); + OUT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); + OUT_RING(chan, 1); + } + } +diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c +index 816bb89..f9fed94 100644 +--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c ++++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c +@@ -96,6 +96,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) + { + struct pipe_framebuffer_state *fb = &nvfx->framebuffer; + struct nouveau_channel *chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + uint32_t rt_enable, rt_format; + int i; + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; +@@ -204,11 +205,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) + + //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch); + +- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR0, 1); + OUT_RELOC(chan, rt0->bo, 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); +- OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_COLOR0_PITCH, 2); + OUT_RING(chan, pitch); + OUT_RELOC(chan, rt0->bo, + rt0->offset, rt_flags | NOUVEAU_BO_LOW, +@@ -216,11 +217,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) + } + + if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) { +- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR1, 1); + OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); +- OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_COLOR1_OFFSET, 2); + OUT_RELOC(chan, nvfx->hw_rt[1].bo, + nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); +@@ -230,68 +231,68 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) + if(nvfx->is_nv4x) + { + if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) { +- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 1); + OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); +- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_OFFSET, 1); + OUT_RELOC(chan, nvfx->hw_rt[2].bo, + nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); +- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_PITCH, 1); + OUT_RING(chan, nvfx->hw_rt[2].pitch); + } + + if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) { +- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR3, 1); + OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); +- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_OFFSET, 1); + OUT_RELOC(chan, nvfx->hw_rt[3].bo, + nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); +- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_PITCH, 1); + OUT_RING(chan, nvfx->hw_rt[3].pitch); + } + } + + if (fb->zsbuf) { +- OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_DMA_ZETA, 1); + OUT_RELOC(chan, nvfx->hw_zeta.bo, 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); +- OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_ZETA_OFFSET, 1); + /* TODO: reverse engineer LMA */ + OUT_RELOC(chan, nvfx->hw_zeta.bo, + nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); + if(nvfx->is_nv4x) { +- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1); + OUT_RING(chan, nvfx->hw_zeta.pitch); + } + } + else if(nvfx->is_nv4x) { +- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1); + OUT_RING(chan, 64); + } + +- OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_RT_ENABLE, 1); + OUT_RING(chan, rt_enable); +- OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3)); ++ BEGIN_RING(chan, eng3d, NV30_3D_RT_HORIZ, 3); + OUT_RING(chan, (w << 16) | 0); + OUT_RING(chan, (h << 16) | 0); + OUT_RING(chan, rt_format); +- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_HORIZ, 2); + OUT_RING(chan, (w << 16) | 0); + OUT_RING(chan, (h << 16) | 0); +- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING(chan, ((w - 1) << 16) | 0); + OUT_RING(chan, ((h - 1) << 16) | 0); + + if(!nvfx->is_nv4x) { + /* Wonder why this is needed, context should all be set to zero on init */ + /* TODO: we can most likely remove this, after putting it in context init */ +- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TX_ORIGIN, 1); + OUT_RING(chan, 0); + } + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER; +diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c +index 6fd6c47..be31853 100644 +--- a/src/gallium/drivers/nvfx/nvfx_surface.c ++++ b/src/gallium/drivers/nvfx/nvfx_surface.c +@@ -168,8 +168,8 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy) + if(nvfx->query && !nvfx->blitters_in_use) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; ++ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); + OUT_RING(chan, 0); + } + +@@ -209,8 +209,8 @@ nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter) + if(nvfx->query && !nvfx->blitters_in_use) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; +- WAIT_RING(chan, 2); +- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; ++ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1); + OUT_RING(chan, 1); + } + } +diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c +index 597664e..01dacb4 100644 +--- a/src/gallium/drivers/nvfx/nvfx_vbo.c ++++ b/src/gallium/drivers/nvfx/nvfx_vbo.c +@@ -9,8 +9,7 @@ + #include "nvfx_resource.h" + + #include "nouveau/nouveau_channel.h" +- +-#include "nouveau/nouveau_pushbuf.h" ++#include "nouveau/nv04_pushbuf.h" + + static inline unsigned + util_guess_unique_indices_count(unsigned mode, unsigned indices) +@@ -247,6 +246,7 @@ boolean + nvfx_vbo_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + int i; + int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; +@@ -262,11 +262,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + float v[4]; + ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0); +- nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp); ++ nvfx_emit_vtx_attr(chan, eng3d, ve->idx, v, ve->ncomp); + } + + +- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; +@@ -297,12 +297,12 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { +- OUT_RING(chan, RING_3D(0x1718, 1)); ++ BEGIN_RING(chan, eng3d, 0x1718, 1); + OUT_RING(chan, 0); + } + } + +- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; +@@ -330,7 +330,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) + OUT_RING(chan, 0); + } + +- OUT_RING(chan, RING_3D(0x1710, 1)); ++ BEGIN_RING(chan, eng3d, 0x1710, 1); + OUT_RING(chan, 0); + + nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; +@@ -342,15 +342,14 @@ void + nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; ++ struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned num_outputs = nvfx->vertprog->draw_elements; + int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr); + + if (!elements) + return; + +- WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2); +- +- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements); + for(unsigned i = 0; i < num_outputs; ++i) + OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT); + for(unsigned i = num_outputs; i < elements; ++i) +@@ -360,16 +359,16 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { +- OUT_RING(chan, RING_3D(0x1718, 1)); ++ BEGIN_RING(chan, eng3d, 0x1718, 1); + OUT_RING(chan, 0); + } + } + +- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements); + for (unsigned i = 0; i < elements; i++) + OUT_RING(chan, 0); + +- OUT_RING(chan, RING_3D(0x1710, 1)); ++ BEGIN_RING(chan, eng3d, 0x1710, 1); + OUT_RING(chan, 0); + + nvfx->hw_vtxelt_nr = num_outputs; +@@ -592,18 +591,10 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + { + struct nvfx_context *nvfx = nvfx_context(pipe); + +- for(unsigned i = 0; i < count; ++i) +- { +- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); +- nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; +- nvfx->vtxbuf[i].max_index = vb[i].max_index; +- nvfx->vtxbuf[i].stride = vb[i].stride; +- } +- +- for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) +- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); ++ util_copy_vertex_buffers(nvfx->vtxbuf, ++ &nvfx->vtxbuf_nr, ++ vb, count); + +- nvfx->vtxbuf_nr = count; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; + } +diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c +index e543fda..a11941f 100644 +--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c ++++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c +@@ -1182,6 +1182,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) + { + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; ++ struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog; + struct nvfx_vertex_program* vp; + struct pipe_resource *constbuf; +@@ -1341,7 +1342,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) + } + */ + +- WAIT_RING(chan, 6 * vp->nr_consts); + for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) { + struct nvfx_vertex_program_data *vpd = &vp->consts[i]; + +@@ -1356,7 +1356,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) + + //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]); + +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5); + OUT_RING(chan, i + vp->data->start); + OUT_RINGp(chan, (uint32_t *)vpd->value, 4); + } +@@ -1364,11 +1364,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) + + /* Upload vtxprog */ + if (upload_code) { +- WAIT_RING(chan, 2 + 5 * vp->nr_insns); +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1); + OUT_RING(chan, vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { +- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4); + //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); + OUT_RINGp(chan, vp->insns[i].data, 4); + } +@@ -1377,11 +1376,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) + + if(nvfx->dirty & (NVFX_NEW_VERTPROG)) + { +- WAIT_RING(chan, 6); +- OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1)); ++ BEGIN_RING(chan, eng3d, NV30_3D_VP_START_FROM_ID, 1); + OUT_RING(chan, vp->exec->start); + if(nvfx->is_nv4x) { +- OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1)); ++ BEGIN_RING(chan, eng3d, NV40_3D_VP_ATTRIB_EN, 1); + OUT_RING(chan, vp->ir); + } + } +diff --git a/src/gallium/targets/dri-nouveau/Makefile b/src/gallium/targets/dri-nouveau/Makefile +index 2f64f31..eb1ee85 100644 +--- a/src/gallium/targets/dri-nouveau/Makefile ++++ b/src/gallium/targets/dri-nouveau/Makefile +@@ -10,6 +10,7 @@ PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/rbug/librbug.a \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a \ ++ $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \ + $(TOP)/src/gallium/drivers/nouveau/libnouveau.a + + C_SOURCES = \ +diff --git a/src/gallium/targets/xorg-nouveau/Makefile b/src/gallium/targets/xorg-nouveau/Makefile +index 2fcd9ff..5a2cdb1 100644 +--- a/src/gallium/targets/xorg-nouveau/Makefile ++++ b/src/gallium/targets/xorg-nouveau/Makefile +@@ -15,6 +15,7 @@ DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a \ ++ $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \ + $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a +diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +index d4bf124..648d6c8 100644 +--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c ++++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +@@ -50,6 +50,9 @@ nouveau_drm_screen_create(int fd) + case 0xa0: + init = nv50_screen_create; + break; ++ case 0xc0: ++ init = nvc0_screen_create; ++ break; + default: + debug_printf("%s: unknown chipset nv%02x\n", __func__, + dev->chipset); +diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c +index 2480b1e..988208f 100644 +--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c ++++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c +@@ -113,8 +113,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti, + if (access & GL_MAP_WRITE_BIT) + flags |= NOUVEAU_BO_WR; + +- ret = nouveau_bo_map(s->bo, flags); +- assert(!ret); ++ if (!s->bo->map) { ++ ret = nouveau_bo_map(s->bo, flags); ++ assert(!ret); ++ } + + ti->Data = s->bo->map + y * s->pitch + x * s->cpp; + } +-- +1.7.3.4 + diff --git a/mesa-nouveau-libdrm-2_4_24.patch b/mesa-nouveau-libdrm-2_4_24.patch deleted file mode 100644 index 173fe36..0000000 --- a/mesa-nouveau-libdrm-2_4_24.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 5c102dd94f435e97507213fbd128e50dd15f5f54 Mon Sep 17 00:00:00 2001 -From: Ben Skeggs -Date: Mon, 20 Dec 2010 13:39:36 +1000 -Subject: [PATCH] nouveau: fix includes for latest libdrm - -Signed-off-by: Ben Skeggs ---- - src/gallium/drivers/nouveau/nouveau_winsys.h | 2 +- - src/gallium/drivers/nv50/nv50_surface.c | 2 +- - src/gallium/drivers/nvfx/nv04_2d.c | 2 +- - src/gallium/drivers/nvfx/nvfx_vbo.c | 3 +-- - src/mesa/drivers/dri/nouveau/nouveau_driver.h | 2 +- - 5 files changed, 5 insertions(+), 6 deletions(-) - -diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h -index ab480ca..747b084 100644 ---- a/src/gallium/drivers/nouveau/nouveau_winsys.h -+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h -@@ -10,7 +10,7 @@ - #include "nouveau/nouveau_grobj.h" - #include "nouveau/nouveau_notifier.h" - #include "nouveau/nouveau_resource.h" --#include "nouveau/nouveau_pushbuf.h" -+#include "nouveau/nv04_pushbuf.h" - - #ifndef NV04_PFIFO_MAX_PACKET_LEN - #define NV04_PFIFO_MAX_PACKET_LEN 2047 -diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c -index ce48022..a99df76 100644 ---- a/src/gallium/drivers/nv50/nv50_surface.c -+++ b/src/gallium/drivers/nv50/nv50_surface.c -@@ -22,7 +22,7 @@ - - #define __NOUVEAU_PUSH_H__ - #include --#include "nouveau/nouveau_pushbuf.h" -+#include "nouveau/nv04_pushbuf.h" - #include "nv50_context.h" - #include "nv50_resource.h" - #include "pipe/p_defines.h" -diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c -index e0e65e7..e2fadd3 100644 ---- a/src/gallium/drivers/nvfx/nv04_2d.c -+++ b/src/gallium/drivers/nvfx/nv04_2d.c -@@ -34,11 +34,11 @@ - #include - #include - #include --#include - #include - #include - #include - #include -+#include - #include "nv04_2d.h" - - #include "nouveau/nv_object.xml.h" -diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c -index 597664e..339b317 100644 ---- a/src/gallium/drivers/nvfx/nvfx_vbo.c -+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c -@@ -9,8 +9,7 @@ - #include "nvfx_resource.h" - - #include "nouveau/nouveau_channel.h" -- --#include "nouveau/nouveau_pushbuf.h" -+#include "nouveau/nv04_pushbuf.h" - - static inline unsigned - util_guess_unique_indices_count(unsigned mode, unsigned indices) -diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h -index 8036b18..c5ac128 100644 ---- a/src/mesa/drivers/dri/nouveau/nouveau_driver.h -+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h -@@ -38,7 +38,6 @@ - #include - - #include "nouveau_device.h" --#include "nouveau_pushbuf.h" - #include "nouveau_grobj.h" - #include "nouveau_channel.h" - #include "nouveau_bo.h" -@@ -46,6 +45,7 @@ - #include "nouveau_screen.h" - #include "nouveau_state.h" - #include "nouveau_surface.h" -+#include "nv04_pushbuf.h" - - #define DRIVER_DATE "20091015" - #define DRIVER_AUTHOR "Nouveau" --- -1.7.3.4 - diff --git a/mesa.spec b/mesa.spec index 129acba..ce2bee3 100644 --- a/mesa.spec +++ b/mesa.spec @@ -15,7 +15,7 @@ Summary: Mesa graphics libraries Name: mesa Version: 7.10 -Release: 0.22%{?dist} +Release: 0.23%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -34,11 +34,18 @@ Patch4: legacy-drivers.patch #Patch7: mesa-7.1-link-shared.patch Patch8: mesa-7.10-llvmcore.patch -Patch10: mesa-nouveau-libdrm-2_4_24.patch - Patch30: mesa-7.6-hush-vblank-warning.patch Patch31: mesa-7.10-swrastg.patch +# nouveau patches +# +# update nouveau gallium drivers to git as of 20110117, nvc0 support +Patch50: mesa-7.10-nouveau-updates.patch +# revert various bits to be compatible with 7.10 +Patch51: mesa-7.10-nouveau-revert.patch +# fixup classic drivers to new libdrm api +Patch52: mesa-7.10-nouveau-classic-libdrm.patch + BuildRequires: pkgconfig autoconf automake libtool %if %{with_hardware} BuildRequires: kernel-headers >= 2.6.27-0.305.rc5.git6 @@ -221,9 +228,11 @@ Requires: Xorg %(xserver-sdk-abi-requires ansic) %(xserver-sdk-abi-requires vide %patch4 -p1 -b .classic #patch7 -p1 -b .dricore %patch8 -p1 -b .llvmcore -%patch10 -p1 -b .nv-libdrm %patch30 -p1 -b .vblank-warning #patch31 -p1 -b .swrastg +%patch50 -p1 -b .nv-update +%patch51 -p1 -b .nv-revert +%patch52 -p1 -b .nv-libdrm %build @@ -477,6 +486,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/libOSMesa.so %changelog +* Thu Jan 20 2011 Ben Skeggs 7.10-0.23 +- nouveau: nvc0 (fermi) backport + nv10/nv20 gnome-shell fixes + * Tue Jan 18 2011 Adam Jackson 7.10-0.22 - Add -dri-filesystem common subpackage for directory and COPYING - Add -dri-llvmcore subpackage and buildsystem hack diff --git a/nouveau-legacy-enable.patch b/nouveau-legacy-enable.patch deleted file mode 100644 index aaa889e..0000000 --- a/nouveau-legacy-enable.patch +++ /dev/null @@ -1,36 +0,0 @@ -diff -up mesa-20100529/configure.ac.nouveau mesa-20100529/configure.ac ---- mesa-20100529/configure.ac.nouveau 2010-05-29 18:57:30.000000000 +1000 -+++ mesa-20100529/configure.ac 2010-05-29 18:58:24.000000000 +1000 -@@ -820,20 +820,20 @@ if test "$mesa_driver" = dri; then - # x86-64 system where they could *ever* be used. - if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i915 i965 mga r128 r200 r300 r600 radeon \ -- savage tdfx unichrome swrast" -+ savage tdfx unichrome swrast nouveau" - fi - ;; - powerpc*) - # Build only the drivers for cards that exist on PowerPC. - # At some point MGA will be added, but not yet. - if test "x$DRI_DIRS" = "xyes"; then -- DRI_DIRS="r128 r200 r300 r600 radeon tdfx swrast" -+ DRI_DIRS="r128 r200 r300 r600 radeon tdfx swrast nouveau" - fi - ;; - sparc*) - # Build only the drivers for cards that exist on sparc` - if test "x$DRI_DIRS" = "xyes"; then -- DRI_DIRS="r128 r200 r300 r600 radeon swrast" -+ DRI_DIRS="r128 r200 r300 r600 radeon swrast nouveau" - fi - ;; - esac -@@ -870,7 +870,7 @@ if test "$mesa_driver" = dri; then - # default drivers - if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i810 i915 i965 mga r128 r200 r300 r600 radeon \ -- savage sis tdfx unichrome swrast" -+ savage sis tdfx unichrome swrast nouveau" - fi - - DRI_DIRS=`echo "$DRI_DIRS" | $SED 's/ */ /g'`