/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned // at precision of DIV_LUT_PREC_BITS along with the shift. static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {
int64_t f;
*shift = (int16_t)((D >> 32) ? get_msb((unsignedint)(D >> 32)) + 32
: get_msb((unsignedint)D)); // e is obtained from D after resetting the most significant 1 bit. const int64_t e = D - ((uint64_t)1 << *shift); // Get the most significant DIV_LUT_BITS (8) bits of e into f if (*shift > DIV_LUT_BITS)
f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS); else
f = e << (DIV_LUT_BITS - *shift);
assert(f <= DIV_LUT_NUM);
*shift += DIV_LUT_PREC_BITS; // Use f as lookup into the precomputed table of multipliers return div_lut[f];
}
static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) {
int32_t f;
*shift = get_msb(D); // e is obtained from D after resetting the most significant 1 bit. const int32_t e = D - ((uint32_t)1 << *shift); // Get the most significant DIV_LUT_BITS (8) bits of e into f if (*shift > DIV_LUT_BITS)
f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS); else
f = e << (DIV_LUT_BITS - *shift);
assert(f <= DIV_LUT_NUM);
*shift += DIV_LUT_PREC_BITS; // Use f as lookup into the precomputed table of multipliers return div_lut[f];
}
#ifndef NDEBUG // Check that the given warp model satisfies the relevant constraints for // its stated model type staticvoid check_model_consistency(WarpedMotionParams *wm) { switch (wm->wmtype) { case IDENTITY:
assert(wm->wmmat[0] == 0);
assert(wm->wmmat[1] == 0);
AOM_FALLTHROUGH_INTENDED; case TRANSLATION:
assert(wm->wmmat[2] == 1 << WARPEDMODEL_PREC_BITS);
assert(wm->wmmat[3] == 0);
AOM_FALLTHROUGH_INTENDED; case ROTZOOM:
assert(wm->wmmat[4] == -wm->wmmat[3]);
assert(wm->wmmat[5] == wm->wmmat[2]);
AOM_FALLTHROUGH_INTENDED; case AFFINE: break; default: assert(0 && "Bad wmtype");
}
} #endif// NDEBUG
// Returns 1 on success or 0 on an invalid affine set int av1_get_shear_params(WarpedMotionParams *wm) { #ifndef NDEBUG // Check that models have been constructed sensibly // This is a good place to check, because this function does not need to // be called until after model construction is complete, but must be called // before the model can be used for prediction.
check_model_consistency(wm); #endif// NDEBUG
const int32_t *mat = wm->wmmat; if (!is_affine_valid(wm)) return 0;
if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta)) return 0;
return 1;
}
#if CONFIG_AV1_HIGHBITDEPTH /* Note: For an explanation of the warp algorithm, and some notes on bit widths for hardware implementations, see the comments above av1_warp_affine_c
*/ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd,
ConvolveParams *conv_params, int16_t alpha,
int16_t beta, int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8]; constint reduce_bits_horiz = conv_params->round_0; constint reduce_bits_vert = conv_params->is_compound
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; constint max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; constint offset_bits_horiz = bd + FILTER_BITS - 1; constint offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; constint round_bits =
2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; constint offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
(void)max_bits_horiz;
assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
// Check that, even with 12-bit input, the intermediate values will fit // into an unsigned 16-bit intermediate array.
assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
for (int i = p_row; i < p_row + p_height; i += 8) { for (int j = p_col; j < p_col + p_width; j += 8) { // Calculate the center of this 8x8 block, // project to luma coordinates (if in a subsampled chroma plane), // apply the affine transformation, // then convert back to the original coordinates (if necessary) const int32_t src_x = (j + 4) << subsampling_x; const int32_t src_y = (i + 4) << subsampling_y; const int64_t dst_x =
(int64_t)mat[2] * src_x + (int64_t)mat[3] * src_y + (int64_t)mat[0]; const int64_t dst_y =
(int64_t)mat[4] * src_x + (int64_t)mat[5] * src_y + (int64_t)mat[1]; const int64_t x4 = dst_x >> subsampling_x; const int64_t y4 = dst_y >> subsampling_y;
/* The warp filter for ROTZOOM and AFFINE models works as follows: * Split the input into 8x8 blocks * For each block, project the point (4, 4) within the block, to get the overall block position. Split into integer and fractional coordinates, maintaining full WARPEDMODEL precision * Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a variable horizontal offset. This means that, while the rows of the intermediate buffer align with the rows of the *reference* image, the columns align with the columns of the *destination* image. * Filter vertically: Generate the output block (up to 8x8 pixels, but if the destination is too small we crop the output at this stage). Each pixel has a variable vertical offset, so that the resulting rows are aligned with the rows of the destination image.
To accomplish these alignments, we factor the warp matrix as a product of two shear / asymmetric zoom matrices: / a b \ = / 1 0 \ * / 1+alpha beta \ \ c d / \ gamma 1+delta / \ 0 1 / where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively. The horizontal shear (with alpha and beta) is applied first, then the vertical shear (with gamma and delta) is applied second.
The only limitation is that, to fit this in a fixed 8-tap filter size, the fractional pixel offsets must be at most +-1. Since the horizontal filter generates 15 rows of 8 columns, and the initial point we project is at (4, 4) within the block, the parameters must satisfy 4 * |alpha| + 7 * |beta| <= 1 and 4 * |gamma| + 4 * |delta| <= 1 for this filter to be applicable.
Note: This function assumes that the caller has done all of the relevant checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5] are set appropriately (if using a ROTZOOM model), and that alpha, beta, gamma, delta are all in range.
TODO(rachelbarker): Maybe support scaled references?
*/ /* A note on hardware implementation: The warp filter is intended to be implementable using the same hardware as the high-precision convolve filters from the loop-restoration and convolve-round experiments.
For a single filter stage, considering all of the coefficient sets for the warp filter and the regular convolution filter, an input in the range [0, 2^k - 1] is mapped into the range [-56 * (2^k - 1), 184 * (2^k - 1)] before rounding.
Allowing for some changes to the filter coefficient sets, call the range [-64 * 2^k, 192 * 2^k]. Then, if we initialize the accumulator to 64 * 2^k, we can replace this by the range [0, 256 * 2^k], which can be stored in an unsigned value with 8 + k bits.
This allows the derivation of the appropriate bit widths and offsets for the various intermediate values: If
F := FILTER_BITS = 7 (or else the above ranges need adjusting) So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit intermediate value. H := ROUND0_BITS V := VERSHEAR_REDUCE_PREC_BITS (and note that we must have H + V = 2*F for the output to have the same scale as the input)
then we end up with the following offsets and ranges: Horizontal filter: Apply an offset of 1 << (bd + F - 1), sum fits into a uint{bd + F + 1} After rounding: The values stored in 'tmp' fit into a uint{bd + F + 1 - H}. Vertical filter: Apply an offset of 1 << (bd + 2*F - H), sum fits into a uint{bd + 2*F + 2 - H} After rounding: The final value, before undoing the offset, fits into a uint{bd + 2}.
Then we need to undo the offsets before clamping to a pixel. Note that, if we do this at the end, the amount to subtract is actually independent of H and V:
offset to subtract = (1 << ((bd + F - 1) - H + F - V)) + (1 << ((bd + 2*F - H) - V)) == (1 << (bd - 1)) + (1 << bd)
This allows us to entirely avoid clamping in both the warp filter and the convolve-round experiment. As of the time of writing, the Wiener filter from loop-restoration can encode a central coefficient up to 216, which leads to a maximum value of about 282 * 2^k after applying the offset. So in that case we still need to clamp.
*/ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y,
ConvolveParams *conv_params, int16_t alpha, int16_t beta,
int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8]; constint bd = 8; constint reduce_bits_horiz = conv_params->round_0; constint reduce_bits_vert = conv_params->is_compound
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; constint max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; constint offset_bits_horiz = bd + FILTER_BITS - 1; constint offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; constint round_bits =
2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; constint offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
(void)max_bits_horiz;
assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
for (int i = p_row; i < p_row + p_height; i += 8) { for (int j = p_col; j < p_col + p_width; j += 8) { // Calculate the center of this 8x8 block, // project to luma coordinates (if in a subsampled chroma plane), // apply the affine transformation, // then convert back to the original coordinates (if necessary) const int32_t src_x = (j + 4) << subsampling_x; const int32_t src_y = (i + 4) << subsampling_y; const int64_t dst_x =
(int64_t)mat[2] * src_x + (int64_t)mat[3] * src_y + (int64_t)mat[0]; const int64_t dst_y =
(int64_t)mat[4] * src_x + (int64_t)mat[5] * src_y + (int64_t)mat[1]; const int64_t x4 = dst_x >> subsampling_x; const int64_t y4 = dst_y >> subsampling_y;
// Horizontal filter for (int k = -7; k < 8; ++k) { // Clamp to top/bottom edge of the frame constint iy = clamp(iy4 + k, 0, height - 1);
int sx = sx4 + beta * (k + 4);
for (int l = -4; l < 4; ++l) { int ix = ix4 + l - 3; // At this point, sx = sx4 + alpha * l + beta * k constint offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); const int16_t *coeffs = av1_warped_filter[offs];
int32_t sum = 1 << offset_bits_horiz; for (int m = 0; m < 8; ++m) { // Clamp to left/right edge of the frame constint sample_x = clamp(ix + m, 0, width - 1);
sum += ref[iy * stride + sample_x] * coeffs[m];
}
sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
assert(0 <= sum && sum < (1 << max_bits_horiz));
tmp[(k + 7) * 8 + (l + 4)] = sum;
sx += alpha;
}
}
// Vertical filter for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { int sy = sy4 + delta * (k + 4); for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) { // At this point, sy = sy4 + gamma * l + delta * k constint offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); const int16_t *coeffs = av1_warped_filter[offs];
int32_t sum = 1 << offset_bits_vert; for (int m = 0; m < 8; ++m) {
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
// Assume the center pixel of the block has exactly the same motion vector // as transmitted for the block. First shift the origin of the source // points to the block center, and the origin of the destination points to // the block center added to the motion vector transmitted. // Let (xi, yi) denote the source points and (xi', yi') denote destination // points after origin shfifting, for i = 0, 1, 2, .... n-1. // Then if P = [x0, y0, // x1, y1 // x2, y1, // .... // ] // q = [x0', x1', x2', ... ]' // r = [y0', y1', y2', ... ]' // the least squares problems that need to be solved are: // [h1, h2]' = inv(P'P)P'q and // [h3, h4]' = inv(P'P)P'r // where the affine transformation is given by: // x' = h1.x + h2.y // y' = h3.x + h4.y // // The loop below computes: A = P'P, Bx = P'q, By = P'r // We need to just compute inv(A).Bx and inv(A).By for the solutions. // Contribution from neighbor block for (int i = 0; i < np; i++) { constint dx = pts2[i * 2] - dux; constint dy = pts2[i * 2 + 1] - duy; constint sx = pts1[i * 2] - sux; constint sy = pts1[i * 2 + 1] - suy; // (TODO)yunqing: This comparison wouldn't be necessary if the sample // selection is done in find_samples(). Also, global offset can be removed // while collecting samples. if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
A[0][0] += LS_SQUARE(sx);
A[0][1] += LS_PRODUCT1(sx, sy);
A[1][1] += LS_SQUARE(sy);
Bx[0] += LS_PRODUCT2(sx, dx);
Bx[1] += LS_PRODUCT1(sy, dx);
By[0] += LS_PRODUCT1(sx, dy);
By[1] += LS_PRODUCT2(sy, dy);
}
}
constint isuy = (mi_row * MI_SIZE + rsuy); constint isux = (mi_col * MI_SIZE + rsux); // Note: In the vx, vy expressions below, the max value of each of the // 2nd and 3rd terms are (2^16 - 1) * (2^13 - 1). That leaves enough room // for the first term so that the overall sum in the worst case fits // within 32 bits overall. const int32_t vx = mvx * (1 << (WARPEDMODEL_PREC_BITS - 3)) -
(isux * (wm->wmmat[2] - (1 << WARPEDMODEL_PREC_BITS)) +
isuy * wm->wmmat[3]); const int32_t vy = mvy * (1 << (WARPEDMODEL_PREC_BITS - 3)) -
(isux * wm->wmmat[4] +
isuy * (wm->wmmat[5] - (1 << WARPEDMODEL_PREC_BITS)));
wm->wmmat[0] =
clamp(vx, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1);
wm->wmmat[1] =
clamp(vy, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1); return 0;
}
int av1_find_projection(int np, constint *pts1, constint *pts2,
BLOCK_SIZE bsize, int mvy, int mvx,
WarpedMotionParams *wm_params, int mi_row, int mi_col) {
assert(wm_params->wmtype == AFFINE);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.