// Copyright 2010 Google Inc. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the COPYING file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Frame-reconstruction function. Memory allocation. // // Author: Skal (pascal.massimino@gmail.com)
// kFilterExtraRows[] = How many extra lines are needed on the MB boundary // for caching, given a filtering level. // Simple filter: up to 2 luma samples are read and 1 is written. // Complex filter: up to 4 luma samples are read and 3 are written. Same for // U/V, so it's 8 samples total (because of the 2x upsampling). staticconst uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
//------------------------------------------------------------------------------ // Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
staticvoid PrecomputeFilterStrengths(VP8Decoder* const dec) { if (dec->filter_type_ > 0) { int s; const VP8FilterHeader* const hdr = &dec->filter_hdr_; for (s = 0; s < NUM_MB_SEGMENTS; ++s) { int i4x4; // First, compute the initial level int base_level; if (dec->segment_hdr_.use_segment_) {
base_level = dec->segment_hdr_.filter_strength_[s]; if (!dec->segment_hdr_.absolute_delta_) {
base_level += hdr->level_;
}
} else {
base_level = hdr->level_;
} for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; int level = base_level; if (hdr->use_lf_delta_) {
level += hdr->ref_lf_delta_[0]; if (i4x4) {
level += hdr->mode_lf_delta_[0];
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level; if (level > 0) { int ilevel = level; if (hdr->sharpness_ > 0) { if (hdr->sharpness_ > 4) {
ilevel >>= 2;
} else {
ilevel >>= 1;
} if (ilevel > 9 - hdr->sharpness_) {
ilevel = 9 - hdr->sharpness_;
}
} if (ilevel < 1) ilevel = 1;
info->f_ilevel_ = ilevel;
info->f_limit_ = 2 * level + ilevel;
info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
} else {
info->f_limit_ = 0; // no filtering
}
info->f_inner_ = i4x4;
}
}
}
}
//------------------------------------------------------------------------------ // This function is called after a row of macroblocks is finished decoding. // It also takes into account the following restrictions: // * In case of in-loop filtering, we must hold off sending some of the bottom // pixels as they are yet unfiltered. They will be when the next macroblock // row is decoded. Meanwhile, we must preserve them by rotating them in the // cache area. This doesn't hold for the very bottom row of the uncropped // picture of course. // * we must clip the remaining pixels against the cropping area. The VP8Io // struct must have the following fields set correctly before calling put():
#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) { int ok = 1;
VP8ThreadContext* const ctx = &dec->thread_ctx_; constint filter_row =
(dec->filter_type_ > 0) &&
(dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_); if (dec->mt_method_ == 0) { // ctx->id_ and ctx->f_info_ are already set
ctx->mb_y_ = dec->mb_y_;
ctx->filter_row_ = filter_row;
ReconstructRow(dec, ctx);
ok = FinishRow(dec, io);
} else {
WebPWorker* const worker = &dec->worker_; // Finish previous job *before* updating context
ok &= WebPGetWorkerInterface()->Sync(worker);
assert(worker->status_ == OK); if (ok) { // spawn a new deblocking/output job
ctx->io_ = *io;
ctx->id_ = dec->cache_id_;
ctx->mb_y_ = dec->mb_y_;
ctx->filter_row_ = filter_row; if (dec->mt_method_ == 2) { // swap macroblock data
VP8MBData* const tmp = ctx->mb_data_;
ctx->mb_data_ = dec->mb_data_;
dec->mb_data_ = tmp;
} else { // perform reconstruction directly in main thread
ReconstructRow(dec, ctx);
} if (filter_row) { // swap filter info
VP8FInfo* const tmp = ctx->f_info_;
ctx->f_info_ = dec->f_info_;
dec->f_info_ = tmp;
} // (reconstruct)+filter in parallel
WebPGetWorkerInterface()->Launch(worker); if (++dec->cache_id_ == dec->num_caches_) {
dec->cache_id_ = 0;
}
}
} return ok;
}
//------------------------------------------------------------------------------ // Finish setting up the decoding parameter once user's setup() is called.
VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { // Call setup() first. This may trigger additional decoding features on 'io'. // Note: Afterward, we must call teardown() no matter what. if (io->setup != NULL && !io->setup(io)) {
VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed"); return dec->status_;
}
// Disable filtering per user request if (io->bypass_filtering) {
dec->filter_type_ = 0;
}
// Define the area where we can skip in-loop filtering, in case of cropping. // // 'Simple' filter reads two luma samples outside of the macroblock // and filters one. It doesn't filter the chroma samples. Hence, we can // avoid doing the in-loop filtering before crop_top/crop_left position. // For the 'Complex' filter, 3 samples are read and up to 3 are filtered. // Means: there's a dependency chain that goes all the way up to the // top-left corner of the picture (MB #0). We must filter all the previous // macroblocks.
{ constint extra_pixels = kFilterExtraRows[dec->filter_type_]; if (dec->filter_type_ == 2) { // For complex filter, we need to preserve the dependency chain.
dec->tl_mb_x_ = 0;
dec->tl_mb_y_ = 0;
} else { // For simple filter, we can filter only the cropped region. // We include 'extra_pixels' on the other side of the boundary, since // vertical or horizontal filtering of the previous macroblock can // modify some abutting pixels.
dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4; if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0; if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
} // We need some 'extra' pixels on the right/bottom.
dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4; if (dec->br_mb_x_ > dec->mb_w_) {
dec->br_mb_x_ = dec->mb_w_;
} if (dec->br_mb_y_ > dec->mb_h_) {
dec->br_mb_y_ = dec->mb_h_;
}
}
PrecomputeFilterStrengths(dec); return VP8_STATUS_OK;
}
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) { int ok = 1; if (dec->mt_method_ > 0) {
ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
}
if (io->teardown != NULL) {
io->teardown(io);
} return ok;
}
//------------------------------------------------------------------------------ // For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line. // // Reason is: the deblocking filter cannot deblock the bottom horizontal edges // immediately, and needs to wait for first few rows of the next macroblock to // be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending // on strength). // With two threads, the vertical positions of the rows being decoded are: // Decode: [ 0..15][16..31][32..47][48..63][64..79][... // Deblock: [ 0..11][12..27][28..43][44..59][... // If we use two threads and two caches of 16 pixels, the sequence would be: // Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][... // Deblock: [ 0..11][12..27!!][-4..11][12..27][... // The problem occurs during row [12..15!!] that both the decoding and // deblocking threads are writing simultaneously. // With 3 cache lines, one get a safe write pattern: // Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0.. // Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28... // Note that multi-threaded output _without_ deblocking can make use of two // cache lines of 16 pixels only, since there's no lagging behind. The decoding // and output process have non-concurrent writing: // Decode: [ 0..15][16..31][ 0..15][16..31][... // io->put: [ 0..15][16..31][ 0..15][...
#define MT_CACHE_LINES 3 #define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
if (!CheckSizeOverflow(needed)) return 0; // check for overflow if (needed > dec->mem_size_) {
WebPSafeFree(dec->mem_);
dec->mem_size_ = 0;
dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t)); if (dec->mem_ == NULL) { return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, "no memory during frame initialization.");
} // down-cast is ok, thanks to WebPSafeMalloc() above.
dec->mem_size_ = (size_t)needed;
}
mem = (uint8_t*)dec->mem_;
dec->intra_t_ = mem;
mem += intra_pred_mode_size;
dec->yuv_t_ = (VP8TopSamples*)mem;
mem += top_size;
dec->mb_info_ = ((VP8MB*)mem) + 1;
mem += mb_info_size;
dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
mem += f_info_size;
dec->thread_ctx_.id_ = 0;
dec->thread_ctx_.f_info_ = dec->f_info_; if (dec->filter_type_ > 0 && dec->mt_method_ > 0) { // secondary cache line. The deblocking process need to make use of the // filtering strength from previous macroblock row, while the new ones // are being decoded in parallel. We'll just swap the pointers.
dec->thread_ctx_.f_info_ += mb_w;
}
mem = (uint8_t*)WEBP_ALIGN(mem);
assert((yuv_size & WEBP_ALIGN_CST) == 0);
dec->yuv_b_ = mem;
mem += yuv_size;
dec->mb_data_ = (VP8MBData*)mem;
dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; if (dec->mt_method_ == 2) {
dec->thread_ctx_.mb_data_ += mb_w;
}
mem += mb_data_size;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.