/* * Copyright 2005 Nicolai Haehnle et al. * Copyright 2008 Advanced Micro Devices, Inc. * Copyright 2009 Jerome Glisse. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Nicolai Haehnle * Jerome Glisse
*/ #ifndef _R300_REG_H_ #define _R300_REG_H_
/* * This file contains registers and constants for the R300. They have been * found mostly by examining command buffers captured using glxtest, as well * as by extrapolating some known registers and constants from the R200. * I am fairly certain that they are correct unless stated otherwise * in comments.
*/
/* * Vertex Array Processing (VAP) Control * Stolen from r200 code from Christoph Brill (It's a guess!)
*/ #define R300_VAP_CNTL 0x2080
/* This register is written directly and also starts data section * in many 3d CP_PACKET3's
*/ #define R300_VAP_VF_CNTL 0x2084 # define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 # define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) # define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) # define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) # define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) # define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) # define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) # define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) # define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) # define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) # define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) # define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0)
# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 /* State based - direct writes to registers trigger vertex
generation */ # define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) # define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) # define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) # define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4)
/* I don't think I saw these three used.. */ # define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 # define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 # define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10
/* index size - when not set the indices are assumed to be 16 bit */ # define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) /* number of vertices */ # define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16
/* Where do we get our vertex data? * * Vertex data either comes either from immediate mode registers or from * vertex arrays. * There appears to be no mixed mode (though we can force the pitch of * vertex arrays to 0, effectively reusing the same element over and over * again). * * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure * if these registers influence vertex array processing. * * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. * * In both cases, vertex attributes are then passed through INPUT_ROUTE. * * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data * into the vertex processor's input registers. * The first word routes the first input, the second word the second, etc. * The corresponding input is routed into the register with the given index. * The list is ended by a word with INPUT_ROUTE_END set. * * Always set COMPONENTS_4 in immediate mode.
*/
/* Notes: * - always set up to produce at least two attributes: * if vertex program uses only position, fglrx will set normal, too * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
*/ #define R300_VAP_INPUT_CNTL_0 0x2180 # define R300_INPUT_CNTL_0_COLOR 0x00000001 #define R300_VAP_INPUT_CNTL_1 0x2184 # define R300_INPUT_CNTL_POS 0x00000001 # define R300_INPUT_CNTL_NORMAL 0x00000002 # define R300_INPUT_CNTL_COLOR 0x00000004 # define R300_INPUT_CNTL_TC0 0x00000400 # define R300_INPUT_CNTL_TC1 0x00000800 # define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ # define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ # define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ # define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ # define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ # define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */
/* gap */
/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 * are set to a swizzling bit pattern, other words are 0. * * In immediate mode, the pattern is always set to xyzw. In vertex array * mode, the swizzling pattern is e.g. used to set zw components in texture * coordinates with only tweo components.
*/ #define R300_VAP_INPUT_ROUTE_1_0 0x21E0 # define R300_INPUT_ROUTE_SELECT_X 0 # define R300_INPUT_ROUTE_SELECT_Y 1 # define R300_INPUT_ROUTE_SELECT_Z 2 # define R300_INPUT_ROUTE_SELECT_W 3 # define R300_INPUT_ROUTE_SELECT_ZERO 4 # define R300_INPUT_ROUTE_SELECT_ONE 5 # define R300_INPUT_ROUTE_SELECT_MASK 7 # define R300_INPUT_ROUTE_X_SHIFT 0 # define R300_INPUT_ROUTE_Y_SHIFT 3 # define R300_INPUT_ROUTE_Z_SHIFT 6 # define R300_INPUT_ROUTE_W_SHIFT 9 # define R300_INPUT_ROUTE_ENABLE (15 << 12) #define R300_VAP_INPUT_ROUTE_1_1 0x21E4 #define R300_VAP_INPUT_ROUTE_1_2 0x21E8 #define R300_VAP_INPUT_ROUTE_1_3 0x21EC #define R300_VAP_INPUT_ROUTE_1_4 0x21F0 #define R300_VAP_INPUT_ROUTE_1_5 0x21F4 #define R300_VAP_INPUT_ROUTE_1_6 0x21F8 #define R300_VAP_INPUT_ROUTE_1_7 0x21FC
/* END: Vertex data assembly */
/* gap */
/* BEGIN: Upload vertex program and data */
/* * The programmable vertex shader unit has a memory bank of unknown size * that can be written to in 16 byte units by writing the address into * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). * * Pointers into the memory bank are always in multiples of 16 bytes. * * The memory bank is divided into areas with fixed meaning. * * Starting at address UPLOAD_PROGRAM: Vertex program instructions. * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), * whereas the difference between known addresses suggests size 512. * * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. * Native reported limits and the VPI layout suggest size 256, whereas * difference between known addresses suggests size 512. * * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the * floating point pointsize. The exact purpose of this state is uncertain, * as there is also the R300_RE_POINTSIZE register. * * Multiple vertex programs and parameter sets can be loaded at once, * which could explain the size discrepancy.
*/ #define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 # define R300_PVS_UPLOAD_PROGRAM 0x00000000 # define R300_PVS_UPLOAD_PARAMETERS 0x00000200 # define R300_PVS_UPLOAD_POINTSIZE 0x00000406
/* gap */
#define R300_VAP_PVS_UPLOAD_DATA 0x2208
/* END: Upload vertex program and data */
/* gap */
/* I do not know the purpose of this register. However, I do know that * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL * for normal rendering.
*/ #define R300_VAP_UNKNOWN_221C 0x221C # define R300_221C_NORMAL 0x00000000 # define R300_221C_CLEAR 0x0001C000
/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first * plane is per-pixel and the second plane is per-vertex. * * This was determined by experimentation alone but I believe it is correct. * * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest.
*/ #define R300_VAP_CLIP_X_0 0x2220 #define R300_VAP_CLIP_X_1 0x2224 #define R300_VAP_CLIP_Y_0 0x2228 #define R300_VAP_CLIP_Y_1 0x2230
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between * rendering commands and overwriting vertex program parameters. * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and * avoids bugs caused by still running shaders reading bad data from memory.
*/ #define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
/* Absolutely no clue what this register is about. */ #define R300_VAP_UNKNOWN_2288 0x2288 # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
/* gap */
/* Addresses are relative to the vertex program instruction area of the * memory bank. PROGRAM_END points to the last instruction of the active * program * * The meaning of the two UNKNOWN fields is obviously not known. However, * experiments so far have shown that both *must* point to an instruction * inside the vertex program, otherwise the GPU locks up. * * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to * position takes place. * * Most likely this is used to ignore rest of the program in cases * where group of verts arent visible. For some reason this "section" * is sometimes accepted other instruction that have no relationship with * position calculations.
*/ #define R300_VAP_PVS_CNTL_1 0x22D0 # define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 # define R300_PVS_CNTL_1_POS_END_SHIFT 10 # define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 /* Addresses are relative the vertex program parameters area. */ #define R300_VAP_PVS_CNTL_2 0x22D4 # define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 # define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 #define R300_VAP_PVS_CNTL_3 0x22D8 # define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10 # define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices
*/ #define R300_VAP_VTX_COLOR_R 0x2464 #define R300_VAP_VTX_COLOR_G 0x2468 #define R300_VAP_VTX_COLOR_B 0x246C #define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ #define R300_VAP_VTX_POS_0_Y_1 0x2494 #define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ #define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ #define R300_VAP_VTX_POS_0_Y_2 0x24A4 #define R300_VAP_VTX_POS_0_Z_2 0x24A8 /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct * and are here so we can use one register file instead of several * - Vladimir
*/ #define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 # define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) # define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) # define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) # define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) # define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) # define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) # define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16)
#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 /* each of the following is 3 bits wide, specifies number
of components */ # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
/* UNK30 seems to enables point to quad transformation on textures * (or something closely related to that). * This bit is rather fatal at the time being due to lackings at pixel * shader side
*/ #define R300_GB_ENABLE 0x4008 # define R300_GB_POINT_STUFF_ENABLE (1<<0) # define R300_GB_LINE_STUFF_ENABLE (1<<1) # define R300_GB_TRIANGLE_STUFF_ENABLE (1<<2) # define R300_GB_STENCIL_AUTO_ENABLE (1<<4) # define R300_GB_UNK31 (1<<31) /* each of the following is 2 bits wide */ #define R300_GB_TEX_REPLICATE 0 #define R300_GB_TEX_ST 1 #define R300_GB_TEX_STR 2 # define R300_GB_TEX0_SOURCE_SHIFT 16 # define R300_GB_TEX1_SOURCE_SHIFT 18 # define R300_GB_TEX2_SOURCE_SHIFT 20 # define R300_GB_TEX3_SOURCE_SHIFT 22 # define R300_GB_TEX4_SOURCE_SHIFT 24 # define R300_GB_TEX5_SOURCE_SHIFT 26 # define R300_GB_TEX6_SOURCE_SHIFT 28 # define R300_GB_TEX7_SOURCE_SHIFT 30
#define R300_GB_FIFO_SIZE 0x4024 /* each of the following is 2 bits wide */ #define R300_GB_FIFO_SIZE_32 0 #define R300_GB_FIFO_SIZE_64 1 #define R300_GB_FIFO_SIZE_128 2 #define R300_GB_FIFO_SIZE_256 3 # define R300_SC_IFIFO_SIZE_SHIFT 0 # define R300_SC_TZFIFO_SIZE_SHIFT 2 # define R300_SC_BFIFO_SIZE_SHIFT 4
# define R300_US_OFIFO_SIZE_SHIFT 12 # define R300_US_WFIFO_SIZE_SHIFT 14 /* the following use the same constants as above, but meaning is
is times 2 (i.e. instead of 32 words it means 64 */ # define R300_RS_TFIFO_SIZE_SHIFT 6 # define R300_RS_CFIFO_SIZE_SHIFT 8 # define R300_US_RAM_SIZE_SHIFT 10 /* watermarks, 3 bits wide */ # define R300_RS_HIGHWATER_COL_SHIFT 16 # define R300_RS_HIGHWATER_TEX_SHIFT 19 # define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ # define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
/* The pointsize is given in multiples of 6. The pointsize can be * enormous: Clear() renders a single point that fills the entire * framebuffer.
*/ #define R300_RE_POINTSIZE 0x421C # define R300_POINTSIZE_Y_SHIFT 0 # define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */ # define R300_POINTSIZE_X_SHIFT 16 # define R300_POINTSIZE_X_MASK (0xFFFF << 16) /* GUESS */ # define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
/* The line width is given in multiples of 6. * In default mode lines are classified as vertical lines. * HO: horizontal * VE: vertical or horizontal * HO & VE: no classification
*/ #define R300_RE_LINE_CNT 0x4234 # define R300_LINESIZE_SHIFT 0 # define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */ # define R300_LINESIZE_MAX (R300_LINESIZE_MASK / 6) # define R300_LINE_CNT_HO (1 << 16) # define R300_LINE_CNT_VE (1 << 17)
/* Some sort of scale or clamp value for texcoordless textures. */ #define R300_RE_UNK4238 0x4238
/* Something shade related */ #define R300_RE_SHADE 0x4274
/* Fog parameters */ #define R300_RE_FOG_SCALE 0x4294 #define R300_RE_FOG_START 0x4298
/* Not sure why there are duplicate of factor and constant values. * My best guess so far is that there are separate zbiases for test and write. * Ordering might be wrong. * Some of the tests indicate that fgl has a fallback implementation of zbias * via pixel shaders.
*/ #define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ #define R300_RE_ZBIAS_T_FACTOR 0x42A4 #define R300_RE_ZBIAS_T_CONSTANT 0x42A8 #define R300_RE_ZBIAS_W_FACTOR 0x42AC #define R300_RE_ZBIAS_W_CONSTANT 0x42B0
/* This register needs to be set to (1<<1) for RV350 to correctly * perform depth test (see --vb-triangles in r300_demo) * Don't know about other chips. - Vladimir * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. * My guess is that there are two bits for each zbias primitive * (FILL, LINE, POINT). * One to enable depth test and one for depth write. * Yet this doesn't explain why depth writes work ...
*/ #define R300_RE_OCCLUSION_CNTL 0x42B4 # define R300_OCCLUSION_ON (1<<1)
/* BEGIN: Rasterization / Interpolators - many guesses */
/* 0_UNKNOWN_18 has always been set except for clear operations. * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends * on the vertex program, *not* the fragment program)
*/ #define R300_RS_CNTL_0 0x4300 # define R300_RS_CNTL_TC_CNT_SHIFT 2 # define R300_RS_CNTL_TC_CNT_MASK (7 << 2) /* number of color interpolators used */ # define R300_RS_CNTL_CI_CNT_SHIFT 7 # define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18) /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n
register. */ #define R300_RS_CNTL_1 0x4304
/* gap */
/* Only used for texture coordinates. * Use the source field to route texture coordinate input from the * vertex program to the desired interpolator. Note that the source * field is relative to the outputs the vertex program *actually* * writes. If a vertex program only writes texcoord[1], this will * be source index 0. * Set INTERP_USED on all interpolators that produce data used by * the fragment program. INTERP_USED looks like a swizzling mask, * but I haven't seen it used that way. * * Note: The _UNKNOWN constants are always set in their respective * register. I don't know if this is necessary.
*/ #define R300_RS_INTERP_0 0x4310 #define R300_RS_INTERP_1 0x4314 # define R300_RS_INTERP_1_UNKNOWN 0x40 #define R300_RS_INTERP_2 0x4318 # define R300_RS_INTERP_2_UNKNOWN 0x80 #define R300_RS_INTERP_3 0x431C # define R300_RS_INTERP_3_UNKNOWN 0xC0 #define R300_RS_INTERP_4 0x4320 #define R300_RS_INTERP_5 0x4324 #define R300_RS_INTERP_6 0x4328 #define R300_RS_INTERP_7 0x432C # define R300_RS_INTERP_SRC_SHIFT 2 # define R300_RS_INTERP_SRC_MASK (7 << 2) # define R300_RS_INTERP_USED 0x00D10000
/* Special handling for color: When the fragment program uses color, * the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the * color register index. * * Apperently you may set the R300_RS_ROUTE_0_COLOR bit, but not provide any * R300_RS_ROUTE_0_COLOR_DEST value; this setup is used for clearing the state. * See r300_ioctl.c:r300EmitClearState. I'm not sure if this setup is strictly * correct or not. - Oliver.
*/ # define R300_RS_ROUTE_0_COLOR (1 << 14) # define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17 # define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */ /* As above, but for secondary color */ # define R300_RS_ROUTE_1_COLOR1 (1 << 14) # define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17 # define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17) # define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11) /* END: Rasterization / Interpolators - many guesses */
/* There are four clipping rectangles. Their corner coordinates are inclusive. * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending * on whether the pixel is inside cliprects 0-3, respectively. For example, * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned * the number 3 (binary 0011). * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, * the pixel is rasterized. * * In addition to this, there is a scissors rectangle. Only pixels inside the * scissors rectangle are drawn. (coordinates are inclusive) * * For some reason, the top-left corner of the framebuffer is at (1440, 1440) * for the purpose of clipping and scissors.
*/ #define R300_RE_CLIPRECT_TL_0 0x43B0 #define R300_RE_CLIPRECT_BR_0 0x43B4 #define R300_RE_CLIPRECT_TL_1 0x43B8 #define R300_RE_CLIPRECT_BR_1 0x43BC #define R300_RE_CLIPRECT_TL_2 0x43C0 #define R300_RE_CLIPRECT_BR_2 0x43C4 #define R300_RE_CLIPRECT_TL_3 0x43C8 #define R300_RE_CLIPRECT_BR_3 0x43CC # define R300_CLIPRECT_OFFSET 1440 # define R300_CLIPRECT_MASK 0x1FFF # define R300_CLIPRECT_X_SHIFT 0 # define R300_CLIPRECT_X_MASK (0x1FFF << 0) # define R300_CLIPRECT_Y_SHIFT 13 # define R300_CLIPRECT_Y_MASK (0x1FFF << 13) #define R300_RE_CLIPRECT_CNTL 0x43D0 # define R300_CLIP_OUT (1 << 0) # define R300_CLIP_0 (1 << 1) # define R300_CLIP_1 (1 << 2) # define R300_CLIP_10 (1 << 3) # define R300_CLIP_2 (1 << 4) # define R300_CLIP_20 (1 << 5) # define R300_CLIP_21 (1 << 6) # define R300_CLIP_210 (1 << 7) # define R300_CLIP_3 (1 << 8) # define R300_CLIP_30 (1 << 9) # define R300_CLIP_31 (1 << 10) # define R300_CLIP_310 (1 << 11) # define R300_CLIP_32 (1 << 12) # define R300_CLIP_320 (1 << 13) # define R300_CLIP_321 (1 << 14) # define R300_CLIP_3210 (1 << 15)
#define R300_TX_SIZE_0 0x4480 # define R300_TX_WIDTHMASK_SHIFT 0 # define R300_TX_WIDTHMASK_MASK (2047 << 0) # define R300_TX_HEIGHTMASK_SHIFT 11 # define R300_TX_HEIGHTMASK_MASK (2047 << 11) # define R300_TX_UNK23 (1 << 23) # define R300_TX_MAX_MIP_LEVEL_SHIFT 26 # define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) # define R300_TX_SIZE_PROJECTED (1<<30) # define R300_TX_SIZE_TXPITCH_EN (1<<31) #define R300_TX_FORMAT_0 0x44C0 /* The interpretation of the format word by Wladimir van der Laan */ /* The X, Y, Z and W refer to the layout of the components. They are given meanings as R, G, B and Alpha by the swizzle
specification */ # define R300_TX_FORMAT_X8 0x0 # define R300_TX_FORMAT_X16 0x1 # define R300_TX_FORMAT_Y4X4 0x2 # define R300_TX_FORMAT_Y8X8 0x3 # define R300_TX_FORMAT_Y16X16 0x4 # define R300_TX_FORMAT_Z3Y3X2 0x5 # define R300_TX_FORMAT_Z5Y6X5 0x6 # define R300_TX_FORMAT_Z6Y5X5 0x7 # define R300_TX_FORMAT_Z11Y11X10 0x8 # define R300_TX_FORMAT_Z10Y11X11 0x9 # define R300_TX_FORMAT_W4Z4Y4X4 0xA # define R300_TX_FORMAT_W1Z5Y5X5 0xB # define R300_TX_FORMAT_W8Z8Y8X8 0xC # define R300_TX_FORMAT_W2Z10Y10X10 0xD # define R300_TX_FORMAT_W16Z16Y16X16 0xE # define R300_TX_FORMAT_DXT1 0xF # define R300_TX_FORMAT_DXT3 0x10 # define R300_TX_FORMAT_DXT5 0x11 # define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ # define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ # define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ # define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ /* 0x16 - some 16 bit green format.. ?? */ # define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */ # define R300_TX_FORMAT_CUBIC_MAP (1 << 26)
/* gap */ /* Floating point formats */ /* Note - hardware supports both 16 and 32 bit floating point */ # define R300_TX_FORMAT_FL_I16 0x18 # define R300_TX_FORMAT_FL_I16A16 0x19 # define R300_TX_FORMAT_FL_R16G16B16A16 0x1A # define R300_TX_FORMAT_FL_I32 0x1B # define R300_TX_FORMAT_FL_I32A32 0x1C # define R300_TX_FORMAT_FL_R32G32B32A32 0x1D # define R300_TX_FORMAT_ATI2N 0x1F /* alpha modes, convenience mostly */ /* if you have alpha, pick constant appropriate to the
number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ # define R300_TX_FORMAT_ALPHA_1CH 0x000 # define R300_TX_FORMAT_ALPHA_2CH 0x200 # define R300_TX_FORMAT_ALPHA_4CH 0x600 # define R300_TX_FORMAT_ALPHA_NONE 0xA00 /* Swizzling */ /* constants */ # define R300_TX_FORMAT_X 0 # define R300_TX_FORMAT_Y 1 # define R300_TX_FORMAT_Z 2 # define R300_TX_FORMAT_W 3 # define R300_TX_FORMAT_ZERO 4 # define R300_TX_FORMAT_ONE 5 /* 2.0*Z, everything above 1.0 is set to 0.0 */ # define R300_TX_FORMAT_CUT_Z 6 /* 2.0*W, everything above 1.0 is set to 0.0 */ # define R300_TX_FORMAT_CUT_W 7
# define R300_TX_FORMAT_B_SHIFT 18 # define R300_TX_FORMAT_G_SHIFT 15 # define R300_TX_FORMAT_R_SHIFT 12 # define R300_TX_FORMAT_A_SHIFT 9 /* Convenience macro to take care of layout and swizzling */ # define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \
((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
| ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
| ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
| ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
| (R300_TX_FORMAT_##FMT) \
) /* These can be ORed with result of R300_EASY_TX_FORMAT() We don't really know what they do. Take values from a
constant color ? */ # define R300_TX_FORMAT_CONST_X (1<<5) # define R300_TX_FORMAT_CONST_Y (2<<5) # define R300_TX_FORMAT_CONST_Z (4<<5) # define R300_TX_FORMAT_CONST_W (8<<5)
/* Fragment programs are written directly into register space. * There are separate instruction streams for texture instructions and ALU * instructions. * In order to synchronize these streams, the program is divided into up * to 4 nodes. Each node begins with a number of TEX operations, followed * by a number of ALU operations. * The first node can have zero TEX ops, all subsequent nodes must have at * least * one TEX ops. * All nodes must have at least one ALU op. * * The index of the last node is stored in PFS_CNTL_0: A value of 0 means * 1 node, a value of 3 means 4 nodes. * The total amount of instructions is defined in PFS_CNTL_2. The offsets are * offsets into the respective instruction streams, while *_END points to the * last instruction relative to this offset.
*/ #define R300_PFS_CNTL_0 0x4600 # define R300_PFS_CNTL_LAST_NODES_SHIFT 0 # define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) # define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) #define R300_PFS_CNTL_1 0x4604 /* There is an unshifted value here which has so far always been equal to the * index of the highest used temporary register.
*/ #define R300_PFS_CNTL_2 0x4608 # define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 # define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) # define R300_PFS_CNTL_ALU_END_SHIFT 6 # define R300_PFS_CNTL_ALU_END_MASK (63 << 6) # define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12 # define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */ # define R300_PFS_CNTL_TEX_END_SHIFT 18 # define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */
/* gap */
/* Nodes are stored backwards. The last active node is always stored in * PFS_NODE_3. * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The * first node is stored in NODE_2, the second node is stored in NODE_3. * * Offsets are relative to the master offset from PFS_CNTL_2.
*/ #define R300_PFS_NODE_0 0x4610 #define R300_PFS_NODE_1 0x4614 #define R300_PFS_NODE_2 0x4618 #define R300_PFS_NODE_3 0x461C # define R300_PFS_NODE_ALU_OFFSET_SHIFT 0 # define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0) # define R300_PFS_NODE_ALU_END_SHIFT 6 # define R300_PFS_NODE_ALU_END_MASK (63 << 6) # define R300_PFS_NODE_TEX_OFFSET_SHIFT 12 # define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12) # define R300_PFS_NODE_TEX_END_SHIFT 17 # define R300_PFS_NODE_TEX_END_MASK (31 << 17) # define R300_PFS_NODE_OUTPUT_COLOR (1 << 22) # define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23)
/* TEX * As far as I can tell, texture instructions cannot write into output * registers directly. A subsequent ALU instruction is always necessary, * even if it's just MAD o0, r0, 1, 0
*/ #define R300_PFS_TEXI_0 0x4620 # define R300_FPITX_SRC_SHIFT 0 # define R300_FPITX_SRC_MASK (31 << 0) /* GUESS */ # define R300_FPITX_SRC_CONST (1 << 5) # define R300_FPITX_DST_SHIFT 6 # define R300_FPITX_DST_MASK (31 << 6) # define R300_FPITX_IMAGE_SHIFT 11 /* GUESS based on layout and native limits */ # define R300_FPITX_IMAGE_MASK (15 << 11) /* Unsure if these are opcodes, or some kind of bitfield, but this is how * they were set when I checked
*/ # define R300_FPITX_OPCODE_SHIFT 15 # define R300_FPITX_OP_TEX 1 # define R300_FPITX_OP_KIL 2 # define R300_FPITX_OP_TXP 3 # define R300_FPITX_OP_TXB 4 # define R300_FPITX_OPCODE_MASK (7 << 15)
/* ALU * The ALU instructions register blocks are enumerated according to the order * in which fglrx. I assume there is space for 64 instructions, since * each block has space for a maximum of 64 DWORDs, and this matches reported * native limits. * * The basic functional block seems to be one MAD for each color and alpha, * and an adder that adds all components after the MUL. * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands * - DP4: Use OUTC_DP4, OUTA_DP4 * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 * - CMP: If ARG2 < 0, return ARG1, else return ARG0 * - FLR: use FRC+MAD * - XPD: use MAD+MAD * - SGE, SLT: use MAD+CMP * - RSQ: use ABS modifier for argument * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation * (e.g. RCP) into color register * - apparently, there's no quick DST operation * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" * * Operand selection * First stage selects three sources from the available registers and * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). * fglrx sorts the three source fields: Registers before constants, * lower indices before higher indices; I do not know whether this is * necessary. * * fglrx fills unused sources with "read constant 0" * According to specs, you cannot select more than two different constants. * * Second stage selects the operands from the sources. This is defined in * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants * zero and one. * Swizzling and negation happens in this stage, as well. * * Important: Color and alpha seem to be mostly separate, i.e. their sources * selection appears to be fully independent (the register storage is probably * physically split into a color and an alpha section). * However (because of the apparent physical split), there is some interaction * WRT swizzling. If, for example, you want to load an R component into an * Alpha operand, this R component is taken from a *color* source, not from * an alpha source. The corresponding register doesn't even have to appear in * the alpha sources list. (I hope this all makes sense to you) * * Destination selection * The destination register index is in FPI1 (color) and FPI3 (alpha) * together with enable bits. * There are separate enable bits for writing into temporary registers * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* * /DSTA_OUTPUT). You can write to both at once, or not write at all (the * same index must be used for both). * * Note: There is a special form for LRP * - Argument order is the same as in ARB_fragment_program. * - Operation is MAD * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP * - Set FPI0/FPI2_SPECIAL_LRP * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
*/ #define R300_PFS_INSTR1_0 0x46C0 # define R300_FPI1_SRC0C_SHIFT 0 # define R300_FPI1_SRC0C_MASK (31 << 0) # define R300_FPI1_SRC0C_CONST (1 << 5) # define R300_FPI1_SRC1C_SHIFT 6 # define R300_FPI1_SRC1C_MASK (31 << 6) # define R300_FPI1_SRC1C_CONST (1 << 11) # define R300_FPI1_SRC2C_SHIFT 12 # define R300_FPI1_SRC2C_MASK (31 << 12) # define R300_FPI1_SRC2C_CONST (1 << 17) # define R300_FPI1_SRC_MASK 0x0003ffff # define R300_FPI1_DSTC_SHIFT 18 # define R300_FPI1_DSTC_MASK (31 << 18) # define R300_FPI1_DSTC_REG_MASK_SHIFT 23 # define R300_FPI1_DSTC_REG_X (1 << 23) # define R300_FPI1_DSTC_REG_Y (1 << 24) # define R300_FPI1_DSTC_REG_Z (1 << 25) # define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26 # define R300_FPI1_DSTC_OUTPUT_X (1 << 26) # define R300_FPI1_DSTC_OUTPUT_Y (1 << 27) # define R300_FPI1_DSTC_OUTPUT_Z (1 << 28)
/* Guess by Vladimir. * Set to 0A before 3D operations, set to 02 afterwards.
*/ /*#define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C*/ # define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002 # define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A
/* gap */ /* There seems to be no "write only" setting, so use Z-test = ALWAYS * for this. * Bit (1<<8) is the "test" bit. so plain write is 6 - vd
*/ #define R300_ZB_CNTL 0x4F00 # define R300_STENCIL_ENABLE (1 << 0) # define R300_Z_ENABLE (1 << 1) # define R300_Z_WRITE_ENABLE (1 << 2) # define R300_Z_SIGNED_COMPARE (1 << 3) # define R300_STENCIL_FRONT_BACK (1 << 4)
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.