/* * Copyright 2008 Advanced Micro Devices, Inc. * Copyright 2008 Red Hat Inc. * Copyright 2009 Jerome Glisse. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Dave Airlie * Alex Deucher * Jerome Glisse
*/
/** * r100_wait_for_vblank - vblank wait asic callback. * * @rdev: radeon_device pointer * @crtc: crtc to wait for vblank on * * Wait for vblank on the requested crtc (r1xx-r4xx).
*/ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
{ unsigned i = 0;
if (crtc >= rdev->num_crtc) return;
if (crtc == 0) { if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN)) return;
} else { if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN)) return;
}
/* depending on when we hit vblank, we may be close to active; if so, * wait for another frame.
*/ while (r100_is_in_vblank(rdev, crtc)) { if (i++ % 100 == 0) { if (!r100_is_counter_moving(rdev, crtc)) break;
}
}
while (!r100_is_in_vblank(rdev, crtc)) { if (i++ % 100 == 0) { if (!r100_is_counter_moving(rdev, crtc)) break;
}
}
}
/** * r100_page_flip - pageflip callback. * * @rdev: radeon_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) * @async: asynchronous flip * * Does the actual pageflip (r1xx-r4xx). * During vblank we take the crtc lock and wait for the update_pending * bit to go high, when it does, we release the lock, and allow the * double buffered update to take place.
*/ void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
{ struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
uint32_t crtc_pitch, pitch_pixels; struct drm_framebuffer *fb = radeon_crtc->base.primary->fb;
u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK; int i;
/* Lock the graphics update lock */ /* update the scanout addresses */
WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
/* Wait for update_pending to go high. */ for (i = 0; i < rdev->usec_timeout; i++) { if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET) break;
udelay(1);
}
DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
/* Unlock the lock, so double-buffering can take place inside vblank */
tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
}
/** * r100_page_flip_pending - check if page flip is still pending * * @rdev: radeon_device pointer * @crtc_id: crtc to check * * Check if the last pagefilp is still pending (r1xx-r4xx). * Returns the current update pending status.
*/ bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
{ struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
/** * r100_pm_get_dynpm_state - look up dynpm power state callback. * * @rdev: radeon_device pointer * * Look up the optimal power state based on the * current state of the GPU (r1xx-r5xx). * Used for dynpm only.
*/ void r100_pm_get_dynpm_state(struct radeon_device *rdev)
{ int i;
rdev->pm.dynpm_can_upclock = true;
rdev->pm.dynpm_can_downclock = true;
switch (rdev->pm.dynpm_planned_action) { case DYNPM_ACTION_MINIMUM:
rdev->pm.requested_power_state_index = 0;
rdev->pm.dynpm_can_downclock = false; break; case DYNPM_ACTION_DOWNCLOCK: if (rdev->pm.current_power_state_index == 0) {
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
rdev->pm.dynpm_can_downclock = false;
} else { if (rdev->pm.active_crtc_count > 1) { for (i = 0; i < rdev->pm.num_power_states; i++) { if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) continue; elseif (i >= rdev->pm.current_power_state_index) {
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; break;
} else {
rdev->pm.requested_power_state_index = i; break;
}
}
} else
rdev->pm.requested_power_state_index =
rdev->pm.current_power_state_index - 1;
} /* don't use the power state if crtcs are active and no display flag is set */ if ((rdev->pm.active_crtc_count > 0) &&
(rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
RADEON_PM_MODE_NO_DISPLAY)) {
rdev->pm.requested_power_state_index++;
} break; case DYNPM_ACTION_UPCLOCK: if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
rdev->pm.dynpm_can_upclock = false;
} else { if (rdev->pm.active_crtc_count > 1) { for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) { if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) continue; elseif (i <= rdev->pm.current_power_state_index) {
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; break;
} else {
rdev->pm.requested_power_state_index = i; break;
}
}
} else
rdev->pm.requested_power_state_index =
rdev->pm.current_power_state_index + 1;
} break; case DYNPM_ACTION_DEFAULT:
rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
rdev->pm.dynpm_can_upclock = false; break; case DYNPM_ACTION_NONE: default:
DRM_ERROR("Requested mode for not defined action\n"); return;
} /* only one clock mode per power state */
rdev->pm.requested_clock_mode_index = 0;
/* * PCI GART
*/ void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
{ /* TODO: can we do somethings here ? */ /* It seems hw only cache one entry so we should discard this * entry otherwise if first GPU GART read hit this entry it
* could end up in wrong address. */
}
int r100_pci_gart_init(struct radeon_device *rdev)
{ int r;
/* Load the microcode for the CP */ staticint r100_cp_init_microcode(struct radeon_device *rdev)
{ constchar *fw_name = NULL; int err;
DRM_DEBUG_KMS("\n");
switch (rdev->family) { case CHIP_R100: case CHIP_RV100: case CHIP_RV200: case CHIP_RS100: case CHIP_RS200:
DRM_INFO("Loading R100 Microcode\n");
fw_name = FIRMWARE_R100; break;
case CHIP_R200: case CHIP_RV250: case CHIP_RV280: case CHIP_RS300:
DRM_INFO("Loading R200 Microcode\n");
fw_name = FIRMWARE_R200; break;
case CHIP_R300: case CHIP_R350: case CHIP_RV350: case CHIP_RV380: case CHIP_RS400: case CHIP_RS480:
DRM_INFO("Loading R300 Microcode\n");
fw_name = FIRMWARE_R300; break;
case CHIP_R420: case CHIP_R423: case CHIP_RV410:
DRM_INFO("Loading R400 Microcode\n");
fw_name = FIRMWARE_R420; break;
case CHIP_RS690: case CHIP_RS740:
DRM_INFO("Loading RS690/RS740 Microcode\n");
fw_name = FIRMWARE_RS690; break;
case CHIP_RS600:
DRM_INFO("Loading RS600 Microcode\n");
fw_name = FIRMWARE_RS600; break;
case CHIP_RV515: case CHIP_R520: case CHIP_RV530: case CHIP_R580: case CHIP_RV560: case CHIP_RV570:
DRM_INFO("Loading R500 Microcode\n");
fw_name = FIRMWARE_R520; break;
default:
DRM_ERROR("Unsupported Radeon family %u\n", rdev->family); return -EINVAL;
}
r100_debugfs_cp_init(rdev); if (!rdev->me_fw) {
r = r100_cp_init_microcode(rdev); if (r) {
DRM_ERROR("Failed to load firmware!\n"); return r;
}
}
/* Align ring size */
rb_bufsz = order_base_2(ring_size / 8);
ring_size = (1 << (rb_bufsz + 1)) * 4;
r100_cp_load_microcode(rdev);
r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
RADEON_CP_PACKET2); if (r) { return r;
} /* Each time the cp read 1024 bytes (16 dword/quadword) update
* the rptr copy in system ram */
rb_blksz = 9; /* cp will read 128bytes at a time (4 dwords) */
max_fetch = 1;
ring->align_mask = 16 - 1; /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
pre_write_timer = 64; /* Force CP_RB_WPTR write if written more than one time before the * delay expire
*/
pre_write_limit = 0; /* Setup the cp cache like this (cache size is 96 dwords) : * RING 0 to 15 * INDIRECT1 16 to 79 * INDIRECT2 80 to 95 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) * Idea being that most of the gpu cmd will be through indirect1 buffer * so it gets the bigger cache.
*/
indirect2_start = 80;
indirect1_start = 16; /* cp setup */
WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
REG_SET(RADEON_MAX_FETCH, max_fetch)); #ifdef __BIG_ENDIAN
tmp |= RADEON_BUF_SWAP_32BIT; #endif
WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
/* Set ring address */
DRM_INFO("radeon: ring at 0x%016lX\n", (unsignedlong)ring->gpu_addr);
WREG32(RADEON_CP_RB_BASE, ring->gpu_addr); /* Force read & write ptr to 0 */
WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
WREG32(RADEON_CP_RB_RPTR_WR, 0);
ring->wptr = 0;
WREG32(RADEON_CP_RB_WPTR, ring->wptr);
/* set the wb address whether it's enabled or not */
WREG32(R_00070C_CP_RB_RPTR_ADDR,
S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
WREG32(RADEON_CP_RB_CNTL, tmp);
udelay(10); /* Set cp mode to bus mastering & enable cp*/
WREG32(RADEON_CP_CSQ_MODE,
REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
REG_SET(RADEON_INDIRECT1_START, indirect1_start));
WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
/* at this point everything should be setup correctly to enable master */
pci_set_master(rdev->pdev);
radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); if (r) {
DRM_ERROR("radeon: cp isn't working (%d).\n", r); return r;
}
ring->ready = true;
radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
if (!ring->rptr_save_reg /* not resuming from suspend */
&& radeon_ring_supports_scratch_reg(rdev, ring)) {
r = radeon_scratch_get(rdev, &ring->rptr_save_reg); if (r) {
DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
ring->rptr_save_reg = 0;
}
} return 0;
}
void r100_cp_fini(struct radeon_device *rdev)
{ if (r100_cp_wait_for_idle(rdev)) {
DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
} /* Disable ring */
r100_cp_disable(rdev);
radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
DRM_INFO("radeon: cp finalized\n");
}
void r100_cp_disable(struct radeon_device *rdev)
{ /* Disable ring */
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
WREG32(RADEON_CP_CSQ_MODE, 0);
WREG32(RADEON_CP_CSQ_CNTL, 0);
WREG32(R_000770_SCRATCH_UMSK, 0); if (r100_gui_wait_for_idle(rdev)) {
pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
}
}
int r100_cs_parse_packet0(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, constunsigned *auth, unsigned n,
radeon_packet0_check_t check)
{ unsigned reg; unsigned i, j, m; unsigned idx; int r;
idx = pkt->idx + 1;
reg = pkt->reg; /* Check that register fall into register range * determined by the number of entry (n) in the * safe register bitmap.
*/ if (pkt->one_reg_wr) { if ((reg >> 7) > n) { return -EINVAL;
}
} else { if (((reg + (pkt->count << 2)) >> 7) > n) { return -EINVAL;
}
} for (i = 0; i <= pkt->count; i++, idx++) {
j = (reg >> 7);
m = 1 << ((reg >> 2) & 31); if (auth[j] & m) {
r = check(p, pkt, idx, reg); if (r) { return r;
}
} if (pkt->one_reg_wr) { if (!(auth[j] & m)) { break;
}
} else {
reg += 4;
}
} return 0;
}
/** * r100_cs_packet_parse_vline() - parse userspace VLINE packet * @p: parser structure holding parsing context. * * Userspace sends a special sequence for VLINE waits. * PACKET0 - VLINE_START_END + value * PACKET0 - WAIT_UNTIL +_value * RELOC (P3) - crtc_id in reloc. * * This function parses this and relocates the VLINE START END * and WAIT UNTIL packets to the correct crtc. * It also detects a switched off crtc and nulls out the * wait in that case.
*/ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
{ struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; struct radeon_cs_packet p3reloc, waitreloc; int crtc_id; int r;
uint32_t header, h_idx, reg; volatile uint32_t *ib;
ib = p->ib.ptr;
/* parse the wait until */
r = radeon_cs_packet_parse(p, &waitreloc, p->idx); if (r) return r;
/* check its a wait until and only 1 count */ if (waitreloc.reg != RADEON_WAIT_UNTIL ||
waitreloc.count != 0) {
DRM_ERROR("vline wait had illegal wait until segment\n"); return -EINVAL;
}
if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
DRM_ERROR("vline wait had illegal wait until\n"); return -EINVAL;
}
/* jump over the NOP */
r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2); if (r) return r;
track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
track->immd_dwords = pkt->count - 1;
r = r100_cs_track_check(p->rdev, track); if (r) return r; break; case PACKET3_3D_DRAW_IMMD: if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL;
}
track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
track->immd_dwords = pkt->count - 1;
r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_IMMD_2: if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL;
}
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
track->immd_dwords = pkt->count;
r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_VBUF_2:
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing of vertex buffers setup elsewhere */ case PACKET3_3D_DRAW_INDX_2:
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing using indices to vertex buffer */ case PACKET3_3D_DRAW_VBUF:
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing of vertex buffers setup elsewhere */ case PACKET3_3D_DRAW_INDX:
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
r = r100_cs_track_check(p->rdev, track); if (r) return r; break; /* triggers drawing using indices to vertex buffer */ case PACKET3_3D_CLEAR_HIZ: case PACKET3_3D_CLEAR_ZMASK: if (p->rdev->hyperz_filp != p->filp) return -EINVAL; break; case PACKET3_NOP: break; default:
DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL;
} return 0;
}
int r100_cs_parse(struct radeon_cs_parser *p)
{ struct radeon_cs_packet pkt; struct r100_cs_track *track; int r;
track = kzalloc(sizeof(*track), GFP_KERNEL); if (!track) return -ENOMEM;
r100_cs_track_clear(p->rdev, track);
p->track = track; do {
r = radeon_cs_packet_parse(p, &pkt, p->idx); if (r) { return r;
}
p->idx += pkt.count + 2; switch (pkt.type) { case RADEON_PACKET_TYPE0: if (p->rdev->family >= CHIP_R200)
r = r100_cs_parse_packet0(p, &pkt,
p->rdev->config.r100.reg_safe_bm,
p->rdev->config.r100.reg_safe_bm_size,
&r200_packet0_check); else
r = r100_cs_parse_packet0(p, &pkt,
p->rdev->config.r100.reg_safe_bm,
p->rdev->config.r100.reg_safe_bm_size,
&r100_packet0_check); break; case RADEON_PACKET_TYPE2: break; case RADEON_PACKET_TYPE3:
r = r100_packet3_check(p, &pkt); break; default:
DRM_ERROR("Unknown packet type %d !\n",
pkt.type); return -EINVAL;
} if (r) return r;
} while (p->idx < p->chunk_ib->length_dw); return 0;
}
staticvoid r100_cs_track_texture_print(struct r100_cs_track_texture *t)
{
DRM_ERROR("pitch %d\n", t->pitch);
DRM_ERROR("use_pitch %d\n", t->use_pitch);
DRM_ERROR("width %d\n", t->width);
DRM_ERROR("width_11 %d\n", t->width_11);
DRM_ERROR("height %d\n", t->height);
DRM_ERROR("height_11 %d\n", t->height_11);
DRM_ERROR("num levels %d\n", t->num_levels);
DRM_ERROR("depth %d\n", t->txdepth);
DRM_ERROR("bpp %d\n", t->cpp);
DRM_ERROR("coordinate type %d\n", t->tex_coord_type);
DRM_ERROR("width round to power of 2 %d\n", t->roundup_w);
DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
DRM_ERROR("compress format %d\n", t->compress_format);
}
staticint r100_track_compress_size(int compress_format, int w, int h)
{ int block_width, block_height, block_bytes; int wblocks, hblocks; int min_wblocks; int sz;
for (face = 0; face < 5; face++) {
cube_robj = track->textures[idx].cube_info[face].robj;
w = track->textures[idx].cube_info[face].width;
h = track->textures[idx].cube_info[face].height;
if (compress_format) {
size = r100_track_compress_size(compress_format, w, h);
} else
size = w * h;
size *= track->textures[idx].cpp;
staticint r100_cs_track_texture_check(struct radeon_device *rdev, struct r100_cs_track *track)
{ struct radeon_bo *robj; unsignedlong size; unsigned u, i, w, h, d; int ret;
for (u = 0; u < track->num_texture; u++) { if (!track->textures[u].enabled) continue; if (track->textures[u].lookup_disable) continue;
robj = track->textures[u].robj; if (robj == NULL) {
DRM_ERROR("No texture bound to unit %u\n", u); return -EINVAL;
}
size = 0; for (i = 0; i <= track->textures[u].num_levels; i++) { if (track->textures[u].use_pitch) { if (rdev->family < CHIP_R300)
w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); else
w = track->textures[u].pitch / (1 << i);
} else {
w = track->textures[u].width; if (rdev->family >= CHIP_RV515)
w |= track->textures[u].width_11;
w = w / (1 << i); if (track->textures[u].roundup_w)
w = roundup_pow_of_two(w);
}
h = track->textures[u].height; if (rdev->family >= CHIP_RV515)
h |= track->textures[u].height_11;
h = h / (1 << i); if (track->textures[u].roundup_h)
h = roundup_pow_of_two(h); if (track->textures[u].tex_coord_type == 1) {
d = (1 << track->textures[u].txdepth) / (1 << i); if (!d)
d = 1;
} else {
d = 1;
} if (track->textures[u].compress_format) {
size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; /* compressed textures are block based */
} else
size += w * h * d;
}
size *= track->textures[u].cpp;
switch (track->textures[u].tex_coord_type) { case 0: case 1: break; case 2: if (track->separate_cube) {
ret = r100_cs_track_cube(rdev, track, u); if (ret) return ret;
} else
size *= 6; break; default:
DRM_ERROR("Invalid texture coordinate type %u for unit " "%u\n", track->textures[u].tex_coord_type, u); return -EINVAL;
} if (size > radeon_bo_size(robj)) {
DRM_ERROR("Texture of unit %u needs %lu bytes but is " "%lu\n", u, size, radeon_bo_size(robj));
r100_cs_track_texture_print(&track->textures[u]); return -EINVAL;
}
} return 0;
}
for (i = 0; i < num_cb; i++) { if (track->cb[i].robj == NULL) {
DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); return -EINVAL;
}
size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
size += track->cb[i].offset; if (size > radeon_bo_size(track->cb[i].robj)) {
DRM_ERROR("[drm] Buffer too small for color buffer %d " "(need %lu have %lu) !\n", i, size,
radeon_bo_size(track->cb[i].robj));
DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
i, track->cb[i].pitch, track->cb[i].cpp,
track->cb[i].offset, track->maxy); return -EINVAL;
}
}
track->cb_dirty = false;
if (track->zb_dirty && track->z_enabled) { if (track->zb.robj == NULL) {
DRM_ERROR("[drm] No buffer for z buffer !\n"); return -EINVAL;
}
size = track->zb.pitch * track->zb.cpp * track->maxy;
size += track->zb.offset; if (size > radeon_bo_size(track->zb.robj)) {
DRM_ERROR("[drm] Buffer too small for z buffer " "(need %lu have %lu) !\n", size,
radeon_bo_size(track->zb.robj));
DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
track->zb.pitch, track->zb.cpp,
track->zb.offset, track->maxy); return -EINVAL;
}
}
track->zb_dirty = false;
if (track->aa_dirty && track->aaresolve) { if (track->aa.robj == NULL) {
DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); return -EINVAL;
} /* I believe the format comes from colorbuffer0. */
size = track->aa.pitch * track->cb[0].cpp * track->maxy;
size += track->aa.offset; if (size > radeon_bo_size(track->aa.robj)) {
DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " "(need %lu have %lu) !\n", i, size,
radeon_bo_size(track->aa.robj));
DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
i, track->aa.pitch, track->cb[0].cpp,
track->aa.offset, track->maxy); return -EINVAL;
}
}
track->aa_dirty = false;
prim_walk = (track->vap_vf_cntl >> 4) & 0x3; if (track->vap_vf_cntl & (1 << 14)) {
nverts = track->vap_alt_nverts;
} else {
nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
} switch (prim_walk) { case 1: for (i = 0; i < track->num_arrays; i++) {
size = track->arrays[i].esize * track->max_indx * 4UL; if (track->arrays[i].robj == NULL) {
DRM_ERROR("(PW %u) Vertex array %u no buffer " "bound\n", prim_walk, i); return -EINVAL;
} if (size > radeon_bo_size(track->arrays[i].robj)) {
dev_err(rdev->dev, "(PW %u) Vertex array %u " "need %lu dwords have %lu dwords\n",
prim_walk, i, size >> 2,
radeon_bo_size(track->arrays[i].robj)
>> 2);
DRM_ERROR("Max indices %u\n", track->max_indx); return -EINVAL;
}
} break; case 2: for (i = 0; i < track->num_arrays; i++) {
size = track->arrays[i].esize * (nverts - 1) * 4UL; if (track->arrays[i].robj == NULL) {
DRM_ERROR("(PW %u) Vertex array %u no buffer " "bound\n", prim_walk, i); return -EINVAL;
} if (size > radeon_bo_size(track->arrays[i].robj)) {
dev_err(rdev->dev, "(PW %u) Vertex array %u " "need %lu dwords have %lu dwords\n",
prim_walk, i, size >> 2,
radeon_bo_size(track->arrays[i].robj)
>> 2); return -EINVAL;
}
} break; case 3:
size = track->vtx_size * nverts; if (size != track->immd_dwords) {
DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
track->immd_dwords, size);
DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
nverts, track->vtx_size); return -EINVAL;
} break; default:
DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
prim_walk); return -EINVAL;
}
/* set these so they don't interfere with anything */
WREG32(RADEON_OV0_SCALE_CNTL, 0);
WREG32(RADEON_SUBPIC_CNTL, 0);
WREG32(RADEON_VIPH_CONTROL, 0);
WREG32(RADEON_I2C_CNTL_1, 0);
WREG32(RADEON_DVI_I2C_CNTL_1, 0);
WREG32(RADEON_CAP0_TRIG_CNTL, 0);
WREG32(RADEON_CAP1_TRIG_CNTL, 0);
/* always set up dac2 on rn50 and some rv100 as lots * of servers seem to wire it up to a VGA port but * don't report it in the bios connector * table.
*/ switch (rdev->pdev->device) { /* RN50 */ case 0x515e: case 0x5969:
force_dac2 = true; break; /* RV100*/ case 0x5159: case 0x515a: /* DELL triple head servers */ if ((rdev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
((rdev->pdev->subsystem_device == 0x016c) ||
(rdev->pdev->subsystem_device == 0x016d) ||
(rdev->pdev->subsystem_device == 0x016e) ||
(rdev->pdev->subsystem_device == 0x016f) ||
(rdev->pdev->subsystem_device == 0x0170) ||
(rdev->pdev->subsystem_device == 0x017d) ||
(rdev->pdev->subsystem_device == 0x017e) ||
(rdev->pdev->subsystem_device == 0x0183) ||
(rdev->pdev->subsystem_device == 0x018a) ||
(rdev->pdev->subsystem_device == 0x019a)))
force_dac2 = true; break;
}
/* Set HDP_APER_CNTL only on cards that are known not to be broken, * that is has the 2nd generation multifunction PCI interface
*/ if (rdev->family == CHIP_RV280 ||
rdev->family >= CHIP_RV350) {
WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
~RADEON_HDP_APER_CNTL);
DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); return aper_size * 2;
}
/* Older cards have all sorts of funny issues to deal with. First * check if it's a multifunction card by reading the PCI config * header type... Limit those to one aperture size
*/
pci_read_config_byte(rdev->pdev, 0xe, &byte); if (byte & 0x80) {
DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
DRM_INFO("Limiting VRAM to one aperture\n"); return aper_size;
}
/* Single function older card. We read HDP_APER_CNTL to see how the BIOS * have set it up. We don't write this as it's broken on some ASICs but * we expect the BIOS to have done the right thing (might be too optimistic...)
*/ if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) return aper_size * 2; return aper_size;
}
/* work out accessible VRAM */
rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); /* FIXME we don't use the second aperture yet when we could use it */ if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
rdev->mc.visible_vram_size = rdev->mc.aper_size;
config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); if (rdev->flags & RADEON_IS_IGP) {
uint32_t tom; /* read NB_TOM to get the amount of ram stolen for the GPU */
tom = RREG32(RADEON_NB_TOM);
rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
} else {
rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); /* Some production boards of m6 will report 0 * if it's 8 MB
*/ if (rdev->mc.real_vram_size == 0) {
rdev->mc.real_vram_size = 8192 * 1024;
WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
} /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - * Novell bug 204882 + along with lots of ubuntu ones
*/ if (rdev->mc.aper_size > config_aper_size)
config_aper_size = rdev->mc.aper_size;
staticvoid r100_pll_errata_after_data(struct radeon_device *rdev)
{ /* This workarounds is necessary on RV100, RS100 and RS200 chips * or the chip could hang on a subsequent access
*/ if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
mdelay(5);
}
/* This function is required to workaround a hardware bug in some (all?) * revisions of the R300. This workaround should be called after every * CLOCK_CNTL_INDEX register access. If not, register reads afterward * may not be correct.
*/ if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
uint32_t save, tmp;
save = RREG32(RADEON_CLOCK_CNTL_INDEX);
tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
WREG32(RADEON_CLOCK_CNTL_INDEX, save);
}
}
mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); if (peak_disp_bw.full >= mem_bw.full) {
DRM_ERROR("You may not have enough display bandwidth for current mode\n" "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
}
/* setup Max GRPH_STOP_REQ default value */ if (ASIC_IS_RV100(rdev))
max_stop_req = 0x5c; else
max_stop_req = 0x7c;
if (mode1) { /* CRTC1 Set GRPH_BUFFER_CNTL register using h/w defined optimal values. GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
*/
stop_req = mode1->hdisplay * pixel_bytes1 / 16;
if (stop_req > max_stop_req)
stop_req = max_stop_req;
/* Find the drain rate of the display buffer.
*/
temp_ff.full = dfixed_const((16/pixel_bytes1));
disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
/* Find the critical point of the display buffer.
*/
crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
crit_point_ff.full += dfixed_const_half(0);
critical_point = dfixed_trunc(crit_point_ff);
if (rdev->disp_priority == 2) {
critical_point = 0;
}
/* The critical point should never be above max_stop_req-4. Setting GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
*/ if (max_stop_req - critical_point < 4)
critical_point = 0;
if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
critical_point = 0x10;
}
DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
(unsignedint)RREG32(RADEON_GRPH2_BUFFER_CNTL));
}
/* Save number of lines the linebuffer leads before the scanout */ if (mode1)
rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
if (mode2)
rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
}
int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
{
uint32_t scratch;
uint32_t tmp = 0; unsigned i; int r;
r = radeon_scratch_get(rdev, &scratch); if (r) {
DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = radeon_ring_lock(rdev, ring, 2); if (r) {
DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
radeon_scratch_free(rdev, scratch); return r;
}
radeon_ring_write(ring, PACKET0(scratch, 0));
radeon_ring_write(ring, 0xDEADBEEF);
radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) {
tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { break;
}
udelay(1);
} if (i < rdev->usec_timeout) {
DRM_INFO("ring test succeeded in %d usecs\n", i);
} else {
DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
r = -EINVAL;
}
radeon_scratch_free(rdev, scratch); return r;
}
r = radeon_scratch_get(rdev, &scratch); if (r) {
DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256); if (r) {
DRM_ERROR("radeon: failed to get ib (%d).\n", r); goto free_scratch;
}
ib.ptr[0] = PACKET0(scratch, 0);
ib.ptr[1] = 0xDEADBEEF;
ib.ptr[2] = PACKET2(0);
ib.ptr[3] = PACKET2(0);
ib.ptr[4] = PACKET2(0);
ib.ptr[5] = PACKET2(0);
ib.ptr[6] = PACKET2(0);
ib.ptr[7] = PACKET2(0);
ib.length_dw = 8;
r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) {
DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib;
}
r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
RADEON_USEC_IB_TEST_TIMEOUT)); if (r < 0) {
DRM_ERROR("radeon: fence wait failed (%d).\n", r); goto free_ib;
} elseif (r == 0) {
DRM_ERROR("radeon: fence wait timed out.\n");
r = -ETIMEDOUT; goto free_ib;
}
r = 0; for (i = 0; i < rdev->usec_timeout; i++) {
tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { break;
}
udelay(1);
} if (i < rdev->usec_timeout) {
DRM_INFO("ib test succeeded in %u usecs\n", i);
} else {
DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
r = -EINVAL;
}
free_ib:
radeon_ib_free(rdev, &ib);
free_scratch:
radeon_scratch_free(rdev, scratch); return r;
}
void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
{ /* Shutdown CP we shouldn't need to do that but better be safe than * sorry
*/
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
WREG32(R_000740_CP_CSQ_CNTL, 0);
/* Save few CRTC registers */
save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET); if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
}
if (radeon_dynclks != -1 && radeon_dynclks)
radeon_legacy_set_clock_gating(rdev, 1); /* We need to force on some of the block */
tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1); if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
}
staticint r100_startup(struct radeon_device *rdev)
{ int r;
/* set common regs */
r100_set_common_regs(rdev); /* program mc */
r100_mc_program(rdev); /* Resume clock */
r100_clock_startup(rdev); /* Initialize GART (initialize after TTM so we can allocate
* memory through TTM but finalize after TTM) */
r100_enable_bm(rdev); if (rdev->flags & RADEON_IS_PCI) {
r = r100_pci_gart_enable(rdev); if (r) return r;
}
/* allocate wb buffer */
r = radeon_wb_init(rdev); if (r) return r;
r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); if (r) {
dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); return r;
}
/* Enable IRQ */ if (!rdev->irq.installed) {
r = radeon_irq_kms_init(rdev); if (r) return r;
}
r100_irq_set(rdev);
rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); /* 1M ring buffer */
r = r100_cp_init(rdev, 1024 * 1024); if (r) {
dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r;
}
r = radeon_ib_pool_init(rdev); if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r;
}
return 0;
}
int r100_resume(struct radeon_device *rdev)
{ int r;
/* Make sur GART are not working */ if (rdev->flags & RADEON_IS_PCI)
r100_pci_gart_disable(rdev); /* Resume clock before doing reset */
r100_clock_startup(rdev); /* Reset gpu before posting otherwise ATOM will enter infinite loop */ if (radeon_asic_reset(rdev)) {
dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
} /* post */
radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */
r100_clock_startup(rdev); /* Initialize surface registers */
radeon_surface_init(rdev);
rdev->accel_working = true;
r = r100_startup(rdev); if (r) {
rdev->accel_working = false;
} return r;
}
int r100_suspend(struct radeon_device *rdev)
{
radeon_pm_suspend(rdev);
r100_cp_disable(rdev);
radeon_wb_disable(rdev);
r100_irq_disable(rdev); if (rdev->flags & RADEON_IS_PCI)
r100_pci_gart_disable(rdev); return 0;
}
/* * Due to how kexec works, it can leave the hw fully initialised when it * boots the new kernel. However doing our init sequence with the CP and * WB stuff setup causes GPU hangs on the RN50 at least. So at startup * do some quick sanity checks and restore sane values to avoid this * problem.
*/ void r100_restore_sanity(struct radeon_device *rdev)
{
u32 tmp;
tmp = RREG32(RADEON_CP_CSQ_CNTL); if (tmp) {
WREG32(RADEON_CP_CSQ_CNTL, 0);
}
tmp = RREG32(RADEON_CP_RB_CNTL); if (tmp) {
WREG32(RADEON_CP_RB_CNTL, 0);
}
tmp = RREG32(RADEON_SCRATCH_UMSK); if (tmp) {
WREG32(RADEON_SCRATCH_UMSK, 0);
}
}
int r100_init(struct radeon_device *rdev)
{ int r;
/* Register debugfs file specific to this group of asics */
r100_debugfs_mc_info_init(rdev); /* Disable VGA */
r100_vga_render_disable(rdev); /* Initialize scratch registers */
radeon_scratch_init(rdev); /* Initialize surface registers */
radeon_surface_init(rdev); /* sanity check some register to avoid hangs like after kexec */
r100_restore_sanity(rdev); /* TODO: disable VGA need to use VGA request */ /* BIOS*/ if (!radeon_get_bios(rdev)) { if (ASIC_IS_AVIVO(rdev)) return -EINVAL;
} if (rdev->is_atom_bios) {
dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n"); return -EINVAL;
} else {
r = radeon_combios_init(rdev); if (r) return r;
} /* Reset gpu before posting otherwise ATOM will enter infinite loop */ if (radeon_asic_reset(rdev)) {
dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
} /* check if cards are posted or not */ if (radeon_boot_test_post_card(rdev) == false) return -EINVAL; /* Set asic errata */
r100_errata(rdev); /* Initialize clocks */
radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) {
r = radeon_agp_init(rdev); if (r) {
radeon_agp_disable(rdev);
}
} /* initialize VRAM */
r100_mc_init(rdev); /* Fence driver */
radeon_fence_driver_init(rdev); /* Memory manager */
r = radeon_bo_init(rdev); if (r) return r; if (rdev->flags & RADEON_IS_PCI) {
r = r100_pci_gart_init(rdev); if (r) return r;
}
r100_set_safe_registers(rdev);
/* Initialize power management */
radeon_pm_init(rdev);
rdev->accel_working = true;
r = r100_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */
dev_err(rdev->dev, "Disabling GPU acceleration\n");
r100_cp_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCI)
r100_pci_gart_fini(rdev);
rdev->accel_working = false;
} return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.