/*
* Copyright © 2018-2021, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include <errno.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>
#include <inttypes.h>
#include "dav1d/data.h"
#include "common/frame.h"
#include "common/intops.h"
#include "src/ctx.h"
#include "src/decode.h"
#include "src/dequant_tables.h"
#include "src/env.h"
#include "src/filmgrain.h"
#include "src/log.h"
#include "src/qm.h"
#include "src/recon.h"
#include "src/ref.h"
#include "src/tables.h"
#include "src/thread_task.h"
#include "src/warpmv.h"
static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
const Dav1dFrameHeader *const frame_hdr,
const int qidx, uint16_t (*dq)[3][2])
{
for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
const int yac = frame_hdr->segmentation.enabled ?
iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
}
}
static int read_mv_component_diff(MsacContext *const msac,
CdfMvComponent *const mv_comp,
const int mv_prec)
{
const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
int up, fp = 3, hp = 1;
if (!cl) {
up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
if (mv_prec >= 0) { // !force_integer_mv
fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
if (mv_prec > 0) // allow_high_precision_mv
hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
}
} else {
up = 1 << cl;
for (int n = 0; n < cl; n++)
up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
if (mv_prec >= 0) { // !force_integer_mv
fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
if (mv_prec > 0) // allow_high_precision_mv
hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
}
}
const int diff = ((up << 3) | (fp << 1) | hp) + 1;
return sign ? -diff : diff;
}
static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv,
const int mv_prec)
{
MsacContext *const msac = &ts->msac;
const enum MVJoint mv_joint =
dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
if (mv_joint & MV_JOINT_V)
ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
if (mv_joint & MV_JOINT_H)
ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
}
static void read_tx_tree(Dav1dTaskContext *const t,
const enum RectTxfmSize from,
const int depth, uint16_t *const masks,
const int x_off, const int y_off)
{
const Dav1dFrameContext *const f = t->f;
const int bx4 = t->bx & 31, by4 = t->by & 31;
const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
const int txw = t_dim->lw, txh = t_dim->lh;
int is_split;
if (depth < 2 && from > (int ) TX_4X4) {
const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
const int a = t->a->tx[bx4] < txw;
const int l = t->l.tx[by4] < txh;
is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
t->ts->cdf.m.txpart[cat][a + l]);
if (is_split)
masks[depth] |= 1 << (y_off * 4 + x_off);
} else {
is_split = 0;
}
if (is_split && t_dim->max > TX_8X8) {
const enum RectTxfmSize sub = t_dim->sub;
const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
t->bx += txsw;
if (txw >= txh && t->bx < f->bw)
read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
t->bx -= txsw;
t->by += txsh;
if (txh >= txw && t->by < f->bh) {
read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
t->bx += txsw;
if (txw >= txh && t->bx < f->bw)
read_tx_tree(t, sub, depth + 1, masks,
x_off * 2 + 1, y_off * 2 + 1);
t->bx -= txsw;
}
t->by -= txsh;
} else {
dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw);
dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh);
}
}
static int neg_deinterleave(int diff, int ref, int max) {
if (!ref) return diff;
if (ref >= (max - 1)) return max - diff - 1;
if (2 * ref < max) {
if (diff <= 2 * ref) {
if (diff & 1)
return ref + ((diff + 1) >> 1);
else
return ref - (diff >> 1);
}
return diff;
} else {
if (diff <= 2 * (max - ref - 1)) {
if (diff & 1)
return ref + ((diff + 1) >> 1);
else
return ref - (diff >> 1);
}
return max - (diff + 1);
}
}
static void find_matching_ref(const Dav1dTaskContext *const t,
const enum EdgeFlags intra_edge_flags,
const int bw4, const int bh4,
const int w4, const int h4,
const int have_left, const int have_top,
const int ref, uint64_t masks[2])
{
/*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
int count = 0;
int have_topleft = have_top && have_left;
int have_topright = imax(bw4, bh4) < 32 &&
have_top && t->bx + bw4 < t->ts->tiling.col_end &&
(intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
#define bs(rp) dav1d_block_dimensions[(rp)->bs]
#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
if (have_top) {
const refmvs_block *r2 = &r[-1][t->bx];
if (matches(r2)) {
masks[0] |= 1;
count = 1;
}
int aw4 = bs(r2)[0];
if (aw4 >= bw4) {
const int off = t->bx & (aw4 - 1);
if (off) have_topleft = 0;
if (aw4 - off > bw4) have_topright = 0;
} else {
unsigned mask = 1 << aw4;
for (int x = aw4; x < w4; x += aw4) {
r2 += aw4;
if (matches(r2)) {
masks[0] |= mask;
if (++count >= 8) return ;
}
aw4 = bs(r2)[0];
mask <<= aw4;
}
}
}
if (have_left) {
/*const*/ refmvs_block *const *r2 = r;
if (matches(&r2[0][t->bx - 1])) {
masks[1] |= 1;
if (++count >= 8) return ;
}
int lh4 = bs(&r2[0][t->bx - 1])[1];
if (lh4 >= bh4) {
if (t->by & (lh4 - 1)) have_topleft = 0;
} else {
unsigned mask = 1 << lh4;
for (int y = lh4; y < h4; y += lh4) {
r2 += lh4;
if (matches(&r2[0][t->bx - 1])) {
masks[1] |= mask;
if (++count >= 8) return ;
}
lh4 = bs(&r2[0][t->bx - 1])[1];
mask <<= lh4;
}
}
}
if (have_topleft && matches(&r[-1][t->bx - 1])) {
masks[1] |= 1ULL << 32;
if (++count >= 8) return ;
}
if (have_topright && matches(&r[-1][t->bx + bw4])) {
masks[0] |= 1ULL << 32;
}
#undef matches
}
static void derive_warpmv(const Dav1dTaskContext *const t,
const int bw4, const int bh4,
const uint64_t masks[2], const union mv mv,
Dav1dWarpedMotionParams *const wmp)
{
int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
/*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
#define add_sample(dx, dy, sx, sy, rp) do { \
pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
np++; \
} while (0)
// use masks[] to find the projectable motion vectors in the edges
if ((unsigned ) masks[0] == 1 && !(masks[1] >> 32)) {
const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
} else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
const int tz = ctz(xmask);
off += tz;
xmask >>= tz;
add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
xmask &= ~1;
}
if (np < 8 && masks[1] == 1) {
const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
} else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
const int tz = ctz(ymask);
off += tz;
ymask >>= tz;
add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
ymask &= ~1;
}
if (np < 8 && masks[1] >> 32) // top/left
add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
if (np < 8 && masks[0] >> 32) // top/right
add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
assert(np > 0 && np <= 8);
#undef bs
// select according to motion vector difference against a threshold
int mvd[8], ret = 0;
const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
for (int i = 0; i < np; i++) {
mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
abs(pts[i][1][1] - pts[i][0][1] - mv.y);
if (mvd[i] > thresh)
mvd[i] = -1;
else
ret++;
}
if (!ret) {
ret = 1;
} else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
while (mvd[i] != -1) i++;
while (mvd[j] == -1) j--;
assert(i != j);
if (i > j) break ;
// replace the discarded samples;
mvd[i] = mvd[j];
memcpy(pts[i], pts[j], sizeof (*pts));
}
if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
!dav1d_get_shear_params(wmp))
{
wmp->type = DAV1D_WM_TYPE_AFFINE;
} else
wmp->type = DAV1D_WM_TYPE_IDENTITY;
}
static inline int findoddzero(const uint8_t *buf, int len) {
for (int n = 0; n < len; n++)
if (!buf[n * 2]) return 1;
return 0;
}
// meant to be SIMD'able, so that theoretical complexity of this function
// times block size goes from w4*h4 to w4+h4-1
// a and b are previous two lines containing (a) top/left entries or (b)
// top/left entries, with a[0] being either the first top or first left entry,
// depending on top_offset being 1 or 0, and b being the first top/left entry
// for whichever has one. left_offset indicates whether the (len-1)th entry
// has a left neighbour.
// output is order[] and ctx for each member of this diagonal.
static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride,
const int i, const int first, const int last,
uint8_t (*const order)[8], uint8_t *const ctx)
{
int have_top = i > first;
assert(pal_idx);
pal_idx += first + (i - first) * stride;
for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
const int have_left = j > 0;
assert(have_left || have_top);
#define add(v_in) do { \
const int v = v_in; \
assert((unsigned )v < 8U); \
order[n][o_idx++] = v; \
mask |= 1 << v; \
} while (0)
unsigned mask = 0;
int o_idx = 0;
if (!have_left) {
ctx[n] = 0;
add(pal_idx[-stride]);
} else if (!have_top) {
ctx[n] = 0;
add(pal_idx[-1]);
} else {
const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
const int same_t_l = t == l;
const int same_t_tl = t == tl;
const int same_l_tl = l == tl;
const int same_all = same_t_l & same_t_tl & same_l_tl;
if (same_all) {
ctx[n] = 4;
add(t);
} else if (same_t_l) {
ctx[n] = 3;
add(t);
add(tl);
} else if (same_t_tl | same_l_tl) {
ctx[n] = 2;
add(tl);
add(same_t_tl ? l : t);
} else {
ctx[n] = 1;
add(imin(t, l));
add(imax(t, l));
add(tl);
}
}
for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
if (!(mask & m))
order[n][o_idx++] = bit;
assert(o_idx == 8);
#undef add
}
}
static void read_pal_indices(Dav1dTaskContext *const t,
uint8_t *const pal_idx,
const int pal_sz, const int pl,
const int w4, const int h4,
const int bw4, const int bh4)
{
Dav1dTileState *const ts = t->ts;
const ptrdiff_t stride = bw4 * 4;
assert(pal_idx);
uint8_t *const pal_tmp = t->scratch.pal_idx_uv;
pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz);
uint16_t (*const color_map_cdf)[8] =
ts->cdf.m.color_map[pl][pal_sz - 2];
uint8_t (*const order)[8] = t->scratch.pal_order;
uint8_t *const ctx = t->scratch.pal_ctx;
for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
// top/left-to-bottom/right diagonals ("wave-front")
const int first = imin(i, w4 * 4 - 1);
const int last = imax(0, i - h4 * 4 + 1);
order_palette(pal_tmp, stride, i, first, last, order, ctx);
for (int j = first, m = 0; j >= last; j--, m++) {
const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
color_map_cdf[ctx[m]], pal_sz - 1);
pal_tmp[(i - j) * stride + j] = order[m][color_idx];
}
}
t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4,
w4 * 4, h4 * 4);
}
static void read_vartx_tree(Dav1dTaskContext *const t,
Av1Block *const b, const enum BlockSize bs,
const int bx4, const int by4)
{
const Dav1dFrameContext *const f = t->f;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bw4 = b_dim[0], bh4 = b_dim[1];
// var-tx tree coding
uint16_t tx_split[2] = { 0 };
b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
b->max_ytx == TX_4X4))
{
b->max_ytx = b->uvtx = TX_4X4;
if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4);
dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4);
}
} else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]);
dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]);
}
b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
} else {
assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
int y, x, y_off, x_off;
const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
// contexts are updated inside read_tx_tree()
t->bx += ytx->w;
}
t->bx -= x;
t->by += ytx->h;
}
t->by -= y;
if (DEBUG_BLOCK_INFO)
printf("Post-vartxtree[%x/%x]: r=%d\n" ,
tx_split[0], tx_split[1], t->ts->msac.rng);
b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
}
assert(!(tx_split[0] & ~0x33));
b->tx_split0 = (uint8_t)tx_split[0];
b->tx_split1 = tx_split[1];
}
static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f,
const int by, const int bx,
const int w4, int h4,
const uint8_t *ref_seg_map,
const ptrdiff_t stride)
{
assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
unsigned seg_id = 8;
ref_seg_map += by * stride + bx;
do {
for (int x = 0; x < w4; x++)
seg_id = imin(seg_id, ref_seg_map[x]);
ref_seg_map += stride;
} while (--h4 > 0 && seg_id);
assert(seg_id < 8);
return seg_id;
}
static inline void splat_oneref_mv(const Dav1dContext *const c,
Dav1dTaskContext *const t,
const enum BlockSize bs,
const Av1Block *const b,
const int bw4, const int bh4)
{
const enum InterPredMode mode = b->inter_mode;
const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
.ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 },
.mv.mv[0] = b->mv[0],
.bs = bs,
.mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2),
};
c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}
static inline void splat_intrabc_mv(const Dav1dContext *const c,
Dav1dTaskContext *const t,
const enum BlockSize bs,
const Av1Block *const b,
const int bw4, const int bh4)
{
const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
.ref.ref = { 0, -1 },
.mv.mv[0] = b->mv[0],
.bs = bs,
.mf = 0,
};
c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}
static inline void splat_tworef_mv(const Dav1dContext *const c,
Dav1dTaskContext *const t,
const enum BlockSize bs,
const Av1Block *const b,
const int bw4, const int bh4)
{
assert(bw4 >= 2 && bh4 >= 2);
const enum CompInterPredMode mode = b->inter_mode;
const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
.ref.ref = { b->ref[0] + 1, b->ref[1] + 1 },
.mv.mv = { b->mv[0], b->mv[1] },
.bs = bs,
.mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2,
};
c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}
static inline void splat_intraref(const Dav1dContext *const c,
Dav1dTaskContext *const t,
const enum BlockSize bs,
const int bw4, const int bh4)
{
const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
.ref.ref = { 0, -1 },
.mv.mv[0].n = INVALID_MV,
.bs = bs,
.mf = 0,
};
c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}
static void mc_lowest_px(int *const dst, const int by4, const int bh4,
const int mvy, const int ss_ver,
const struct ScalableMotionParams *const smp)
{
const int v_mul = 4 >> ss_ver;
if (!smp->scale) {
const int my = mvy >> (3 + ss_ver), dy = mvy & (15 >> !ss_ver);
*dst = imax(*dst, (by4 + bh4) * v_mul + my + 4 * !!dy);
} else {
int y = (by4 * v_mul << 4) + mvy * (1 << !ss_ver);
const int64_t tmp = (int64_t)(y) * smp->scale + (smp->scale - 0x4000) * 8;
y = apply_sign64((int )((llabs(tmp) + 128) >> 8), tmp) + 32;
const int bottom = ((y + (bh4 * v_mul - 1) * smp->step) >> 10) + 1 + 4;
*dst = imax(*dst, bottom);
}
}
static ALWAYS_INLINE void affine_lowest_px(Dav1dTaskContext *const t, int *const dst,
const uint8_t *const b_dim,
const Dav1dWarpedMotionParams *const wmp,
const int ss_ver, const int ss_hor)
{
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
const int32_t *const mat = wmp->matrix;
const int y = b_dim[1] * v_mul - 8; // lowest y
const int src_y = t->by * 4 + ((y + 4) << ss_ver);
const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
// check left- and right-most blocks
for (int x = 0; x < b_dim[0] * h_mul; x += imax(8, b_dim[0] * h_mul - 8)) {
// calculate transformation relative to center of 8x8 block in
// luma pixel units
const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
const int dy = (int ) (mvy >> 16) - 4;
*dst = imax(*dst, dy + 4 + 8);
}
}
static NOINLINE void affine_lowest_px_luma(Dav1dTaskContext *const t, int *const dst,
const uint8_t *const b_dim,
const Dav1dWarpedMotionParams *const wmp)
{
affine_lowest_px(t, dst, b_dim, wmp, 0, 0);
}
static NOINLINE void affine_lowest_px_chroma(Dav1dTaskContext *const t, int *const dst,
const uint8_t *const b_dim,
const Dav1dWarpedMotionParams *const wmp)
{
const Dav1dFrameContext *const f = t->f;
assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400);
if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444)
affine_lowest_px_luma(t, dst, b_dim, wmp);
else
affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1);
}
static void obmc_lowest_px(Dav1dTaskContext *const t,
int (*const dst)[2], const int is_chroma,
const uint8_t *const b_dim,
const int bx4, const int by4, const int w4, const int h4)
{
assert(!(t->bx & 1) && !(t->by & 1));
const Dav1dFrameContext *const f = t->f;
/*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
if (t->by > t->ts->tiling.row_start &&
(!is_chroma || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
{
for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
// only odd blocks are considered for overlap handling, hence +1
const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
if (a_r->ref.ref[0] > 0) {
const int oh4 = imin(b_dim[1], 16) >> 1;
mc_lowest_px(&dst[a_r->ref.ref[0] - 1][is_chroma], t->by,
(oh4 * 3 + 3) >> 2, a_r->mv.mv[0].y, ss_ver,
&f->svc[a_r->ref.ref[0] - 1][1]);
i++;
}
x += imax(a_b_dim[0], 2);
}
}
if (t->bx > t->ts->tiling.col_start)
for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
// only odd blocks are considered for overlap handling, hence +1
const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
if (l_r->ref.ref[0] > 0) {
const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
mc_lowest_px(&dst[l_r->ref.ref[0] - 1][is_chroma],
t->by + y, oh4, l_r->mv.mv[0].y, ss_ver,
&f->svc[l_r->ref.ref[0] - 1][1]);
i++;
}
y += imax(l_b_dim[1], 2);
}
}
static int decode_b(Dav1dTaskContext *const t,
const enum BlockLevel bl,
const enum BlockSize bs,
const enum BlockPartition bp,
const enum EdgeFlags intra_edge_flags) {
Dav1dTileState *const ts = t->ts;
const Dav1dFrameContext *const f = t->f;
Av1Block b_mem, *const b = t->frame_thread.pass ?
&f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bx4 = t->bx & 31, by4 = t->by & 31;
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const int bw4 = b_dim[0], bh4 = b_dim[1];
const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
const int have_left = t->bx > ts->tiling.col_start;
const int have_top = t->by > ts->tiling.row_start;
const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
(bw4 > ss_hor || t->bx & 1) &&
(bh4 > ss_ver || t->by & 1);
if (t->frame_thread.pass == 2) {
if (b->intra) {
f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
const enum IntraPredMode y_mode_nofilt =
b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
#define set_ctx(rep_macro) \
rep_macro(edge->mode, off, y_mode_nofilt); \
rep_macro(edge->intra, off, 1)
BlockContext *edge = t->a;
for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
case_set(b_dim[2 + i]);
}
#undef set_ctx
if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
for (int x = 0; x < bw4; x++) {
r[x].ref.ref[0] = 0;
r[x].bs = bs;
}
refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
for (int y = 0; y < bh4 - 1; y++) {
rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
rr[y][t->bx + bw4 - 1].bs = bs;
}
}
if (has_chroma) {
uint8_t uv_mode = b->uv_mode;
dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
}
} else {
if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
{
if (b->matrix[0] == SHRT_MIN) {
t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
} else {
t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
t->warpmv.matrix[3] = b->matrix[1];
t->warpmv.matrix[4] = b->matrix[2];
t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
t->bx, t->by);
dav1d_get_shear_params(&t->warpmv);
#define signabs(v) v < 0 ? '-' : ' ' , abs(v)
if (DEBUG_BLOCK_INFO)
printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n"
"alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n" ,
signabs(t->warpmv.matrix[0]),
signabs(t->warpmv.matrix[1]),
signabs(t->warpmv.matrix[2]),
signabs(t->warpmv.matrix[3]),
signabs(t->warpmv.matrix[4]),
signabs(t->warpmv.matrix[5]),
signabs(t->warpmv.u.p.alpha),
signabs(t->warpmv.u.p.beta),
signabs(t->warpmv.u.p.gamma),
signabs(t->warpmv.u.p.delta),
b->mv2d.y, b->mv2d.x);
#undef signabs
}
}
if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
BlockContext *edge = t->a;
for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
#define set_ctx(rep_macro) \
rep_macro(edge->filter[0], off, filter[0]); \
rep_macro(edge->filter[1], off, filter[1]); \
rep_macro(edge->intra, off, 0)
case_set(b_dim[2 + i]);
#undef set_ctx
}
if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
for (int x = 0; x < bw4; x++) {
r[x].ref.ref[0] = b->ref[0] + 1;
r[x].mv.mv[0] = b->mv[0];
r[x].bs = bs;
}
refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
for (int y = 0; y < bh4 - 1; y++) {
rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
rr[y][t->bx + bw4 - 1].bs = bs;
}
}
if (has_chroma) {
dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
}
}
return 0;
}
const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
b->bl = bl;
b->bp = bp;
b->bs = bs;
const Dav1dSegmentationData *seg = NULL;
// segment_id (if seg_feature for skip/ref/gmv is enabled)
int seg_pred = 0;
if (f->frame_hdr->segmentation.enabled) {
if (!f->frame_hdr->segmentation.update_map) {
if (f->prev_segmap) {
unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
f->prev_segmap,
f->b4_stride);
if (seg_id >= 8) return -1;
b->seg_id = seg_id;
} else {
b->seg_id = 0;
}
seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
} else if (f->frame_hdr->segmentation.seg_data.preskip) {
if (f->frame_hdr->segmentation.temporal &&
(seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
t->l.seg_pred[by4]])))
{
// temporal predicted seg_id
if (f->prev_segmap) {
unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
w4, h4,
f->prev_segmap,
f->b4_stride);
if (seg_id >= 8) return -1;
b->seg_id = seg_id;
} else {
b->seg_id = 0;
}
} else {
int seg_ctx;
const unsigned pred_seg_id =
get_cur_frame_segid(t->by, t->bx, have_top, have_left,
&seg_ctx, f->cur_segmap, f->b4_stride);
const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.seg_id[seg_ctx],
DAV1D_MAX_SEGMENTS - 1);
const unsigned last_active_seg_id =
f->frame_hdr->segmentation.seg_data.last_active_segid;
b->seg_id = neg_deinterleave(diff, pred_seg_id,
last_active_seg_id + 1);
if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
}
if (DEBUG_BLOCK_INFO)
printf("Post-segid[preskip;%d]: r=%d\n" ,
b->seg_id, ts->msac.rng);
seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
}
} else {
b->seg_id = 0;
}
// skip_mode
if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
{
const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.skip_mode[smctx]);
if (DEBUG_BLOCK_INFO)
printf("Post-skipmode[%d]: r=%d\n" , b->skip_mode, ts->msac.rng);
} else {
b->skip_mode = 0;
}
// skip
if (b->skip_mode || (seg && seg->skip)) {
b->skip = 1;
} else {
const int sctx = t->a->skip[bx4] + t->l.skip[by4];
b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
if (DEBUG_BLOCK_INFO)
printf("Post-skip[%d]: r=%d\n" , b->skip, ts->msac.rng);
}
// segment_id
if (f->frame_hdr->segmentation.enabled &&
f->frame_hdr->segmentation.update_map &&
!f->frame_hdr->segmentation.seg_data.preskip)
{
if (!b->skip && f->frame_hdr->segmentation.temporal &&
(seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
t->l.seg_pred[by4]])))
{
// temporal predicted seg_id
if (f->prev_segmap) {
unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
f->prev_segmap,
f->b4_stride);
if (seg_id >= 8) return -1;
b->seg_id = seg_id;
} else {
b->seg_id = 0;
}
} else {
int seg_ctx;
const unsigned pred_seg_id =
get_cur_frame_segid(t->by, t->bx, have_top, have_left,
&seg_ctx, f->cur_segmap, f->b4_stride);
if (b->skip) {
b->seg_id = pred_seg_id;
} else {
const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.seg_id[seg_ctx],
DAV1D_MAX_SEGMENTS - 1);
const unsigned last_active_seg_id =
f->frame_hdr->segmentation.seg_data.last_active_segid;
b->seg_id = neg_deinterleave(diff, pred_seg_id,
last_active_seg_id + 1);
if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
}
if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
}
seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
if (DEBUG_BLOCK_INFO)
printf("Post-segid[postskip;%d]: r=%d\n" ,
b->seg_id, ts->msac.rng);
}
// cdef index
if (!b->skip) {
const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
((t->by & 16) >> 3) : 0;
if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
const int v = dav1d_msac_decode_bools(&ts->msac,
f->frame_hdr->cdef.n_bits);
t->cur_sb_cdef_idx_ptr[idx] = v;
if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
if (DEBUG_BLOCK_INFO)
printf("Post-cdef_idx[%d]: r=%d\n" ,
*t->cur_sb_cdef_idx_ptr, ts->msac.rng);
}
}
// delta-q/lf
if (!(t->bx & (31 >> !f->seq_hdr->sb128)) &&
!(t->by & (31 >> !f->seq_hdr->sb128)))
{
const int prev_qidx = ts->last_qidx;
const int have_delta_q = f->frame_hdr->delta.q.present &&
(bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
uint32_t prev_delta_lf = ts->last_delta_lf.u32;
if (have_delta_q) {
int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.delta_q, 3);
if (delta_q == 3) {
const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
1 + (1 << n_bits);
}
if (delta_q) {
if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
}
ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
if (have_delta_q && DEBUG_BLOCK_INFO)
printf("Post-delta_q[%d->%d]: r=%d\n" ,
delta_q, ts->last_qidx, ts->msac.rng);
if (f->frame_hdr->delta.lf.present) {
const int n_lfs = f->frame_hdr->delta.lf.multi ?
f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
for (int i = 0; i < n_lfs; i++) {
int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
if (delta_lf == 3) {
const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
1 + (1 << n_bits);
}
if (delta_lf) {
if (dav1d_msac_decode_bool_equi(&ts->msac))
delta_lf = -delta_lf;
delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
}
ts->last_delta_lf.i8[i] =
iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
if (have_delta_q && DEBUG_BLOCK_INFO)
printf("Post-delta_lf[%d:%d]: r=%d\n" , i, delta_lf,
ts->msac.rng);
}
}
}
if (ts->last_qidx == f->frame_hdr->quant.yac) {
// assign frame-wide q values to this sb
ts->dq = f->dq;
} else if (ts->last_qidx != prev_qidx) {
// find sb-specific quant parameters
init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
ts->dq = ts->dqmem;
}
if (!ts->last_delta_lf.u32) {
// assign frame-wide lf values to this sb
ts->lflvl = f->lf.lvl;
} else if (ts->last_delta_lf.u32 != prev_delta_lf) {
// find sb-specific lf lvl parameters
ts->lflvl = ts->lflvlmem;
dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
}
}
if (b->skip_mode) {
b->intra = 0;
} else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
if (seg && (seg->ref >= 0 || seg->globalmv)) {
b->intra = !seg->ref;
} else {
const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.intra[ictx]);
if (DEBUG_BLOCK_INFO)
printf("Post-intra[%d]: r=%d\n" , b->intra, ts->msac.rng);
}
} else if (f->frame_hdr->allow_intrabc) {
b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
if (DEBUG_BLOCK_INFO)
printf("Post-intrabcflag[%d]: r=%d\n" , b->intra, ts->msac.rng);
} else {
b->intra = 1;
}
// intra/inter-specific stuff
if (b->intra) {
uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
[dav1d_intra_mode_context[t->l.mode[by4]]];
b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
N_INTRA_PRED_MODES - 1);
if (DEBUG_BLOCK_INFO)
printf("Post-ymode[%d]: r=%d\n" , b->y_mode, ts->msac.rng);
// angle delta
if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
b->y_mode <= VERT_LEFT_PRED)
{
uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
b->y_angle = angle - 3;
} else {
b->y_angle = 0;
}
if (has_chroma) {
const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
if (DEBUG_BLOCK_INFO)
printf("Post-uvmode[%d]: r=%d\n" , b->uv_mode, ts->msac.rng);
b->uv_angle = 0;
if (b->uv_mode == CFL_PRED) {
#define SIGN(a) (!!(a) + ((a) > 0))
const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.cfl_sign, 7) + 1;
const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
assert(sign_u == sign / 3);
if (sign_u) {
const int ctx = (sign_u == 2) * 3 + sign_v;
b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
ts->cdf.m.cfl_alpha[ctx], 15) + 1;
if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
} else {
b->cfl_alpha[0] = 0;
}
if (sign_v) {
const int ctx = (sign_v == 2) * 3 + sign_u;
b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
ts->cdf.m.cfl_alpha[ctx], 15) + 1;
if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
} else {
b->cfl_alpha[1] = 0;
}
#undef SIGN
if (DEBUG_BLOCK_INFO)
printf("Post-uvalphas[%d/%d]: r=%d\n" ,
b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
} else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
b->uv_mode <= VERT_LEFT_PRED)
{
uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
b->uv_angle = angle - 3;
}
}
b->pal_sz[0] = b->pal_sz[1] = 0;
if (f->frame_hdr->allow_screen_content_tools &&
imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
{
const int sz_ctx = b_dim[2] + b_dim[3] - 2;
if (b->y_mode == DC_PRED) {
const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
if (DEBUG_BLOCK_INFO)
printf("Post-y_pal[%d]: r=%d\n" , use_y_pal, ts->msac.rng);
if (use_y_pal)
f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
}
if (has_chroma && b->uv_mode == DC_PRED) {
const int pal_ctx = b->pal_sz[0] > 0;
const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.pal_uv[pal_ctx]);
if (DEBUG_BLOCK_INFO)
printf("Post-uv_pal[%d]: r=%d\n" , use_uv_pal, ts->msac.rng);
if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4);
}
}
if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
{
const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.use_filter_intra[bs]);
if (is_filter) {
b->y_mode = FILTER_PRED;
b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.filter_intra, 4);
}
if (DEBUG_BLOCK_INFO)
printf("Post-filterintramode[%d/%d]: r=%d\n" ,
b->y_mode, b->y_angle, ts->msac.rng);
}
if (b->pal_sz[0]) {
uint8_t *pal_idx;
if (t->frame_thread.pass) {
const int p = t->frame_thread.pass & 1;
assert(ts->frame_thread[p].pal_idx);
pal_idx = ts->frame_thread[p].pal_idx;
ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
} else
pal_idx = t->scratch.pal_idx_y;
read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4);
if (DEBUG_BLOCK_INFO)
printf("Post-y-pal-indices: r=%d\n" , ts->msac.rng);
}
if (has_chroma && b->pal_sz[1]) {
uint8_t *pal_idx;
if (t->frame_thread.pass) {
const int p = t->frame_thread.pass & 1;
assert(ts->frame_thread[p].pal_idx);
pal_idx = ts->frame_thread[p].pal_idx;
ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
} else
pal_idx = t->scratch.pal_idx_uv;
read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4);
if (DEBUG_BLOCK_INFO)
printf("Post-uv-pal-indices: r=%d\n" , ts->msac.rng);
}
const TxfmInfo *t_dim;
if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
b->tx = b->uvtx = (int ) TX_4X4;
t_dim = &dav1d_txfm_dimensions[TX_4X4];
} else {
b->tx = dav1d_max_txfm_size_for_bs[bs][0];
b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
t_dim = &dav1d_txfm_dimensions[b->tx];
if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
imin(t_dim->max, 2));
while (depth--) {
b->tx = t_dim->sub;
t_dim = &dav1d_txfm_dimensions[b->tx];
}
}
if (DEBUG_BLOCK_INFO)
printf("Post-tx[%d]: r=%d\n" , b->tx, ts->msac.rng);
}
// reconstruction
if (t->frame_thread.pass == 1) {
f->bd_fn.read_coef_blocks(t, bs, b);
} else {
f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
}
if (f->frame_hdr->loopfilter.level_y[0] ||
f->frame_hdr->loopfilter.level_y[1])
{
dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
(const uint8_t (*)[8][2])
&ts->lflvl[b->seg_id][0][0][0],
t->bx, t->by, f->w4, f->h4, bs,
b->tx, b->uvtx, f->cur.p.layout,
&t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
}
// update contexts
const enum IntraPredMode y_mode_nofilt =
b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
BlockContext *edge = t->a;
for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh
#define set_ctx(rep_macro) \
rep_macro(edge->tx_intra, off, t_lsz); \
rep_macro(edge->tx, off, t_lsz); \
rep_macro(edge->mode, off, y_mode_nofilt); \
rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
rep_macro(edge->seg_pred, off, seg_pred); \
rep_macro(edge->skip_mode, off, 0); \
rep_macro(edge->intra, off, 1); \
rep_macro(edge->skip, off, b->skip); \
/* see aomedia bug 2183 for why we use luma coordinates here */ \
rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
}
case_set(b_dim[2 + i]);
#undef set_ctx
}
if (b->pal_sz[0])
f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4);
if (has_chroma) {
uint8_t uv_mode = b->uv_mode;
dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
if (b->pal_sz[1])
f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4);
}
if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc)
splat_intraref(f->c, t, bs, bw4, bh4);
} else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
// intra block copy
refmvs_candidate mvstack[8];
int n_mvs, ctx;
dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
(union refmvs_refpair) { .ref = { 0, -1 }},
bs, intra_edge_flags, t->by, t->bx);
if (mvstack[0].mv.mv[0].n)
b->mv[0] = mvstack[0].mv.mv[0];
else if (mvstack[1].mv.mv[0].n)
b->mv[0] = mvstack[1].mv.mv[0];
else {
if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
b->mv[0].y = 0;
b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
} else {
b->mv[0].y = -(512 << f->seq_hdr->sb128);
b->mv[0].x = 0;
}
}
const union mv ref = b->mv[0];
read_mv_residual(ts, &b->mv[0], -1);
// clip intrabc motion vector to decoded parts of current tile
int border_left = ts->tiling.col_start * 4;
int border_top = ts->tiling.row_start * 4;
if (has_chroma) {
if (bw4 < 2 && ss_hor)
border_left += 4;
if (bh4 < 2 && ss_ver)
border_top += 4;
}
int src_left = t->bx * 4 + (b->mv[0].x >> 3);
int src_top = t->by * 4 + (b->mv[0].y >> 3);
int src_right = src_left + bw4 * 4;
int src_bottom = src_top + bh4 * 4;
const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;
// check against left or right tile boundary and adjust if necessary
if (src_left < border_left) {
src_right += border_left - src_left;
src_left += border_left - src_left;
} else if (src_right > border_right) {
src_left -= src_right - border_right;
src_right -= src_right - border_right;
}
// check against top tile boundary and adjust if necessary
if (src_top < border_top) {
src_bottom += border_top - src_top;
src_top += border_top - src_top;
}
const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
const int sb_size = 1 << (6 + f->seq_hdr->sb128);
// check for overlap with current superblock
if (src_bottom > sby && src_right > sbx) {
if (src_top - border_top >= src_bottom - sby) {
// if possible move src up into the previous suberblock row
src_top -= src_bottom - sby;
src_bottom -= src_bottom - sby;
} else if (src_left - border_left >= src_right - sbx) {
// if possible move src left into the previous suberblock
src_left -= src_right - sbx;
src_right -= src_right - sbx;
}
}
// move src up if it is below current superblock row
if (src_bottom > sby + sb_size) {
src_top -= src_bottom - (sby + sb_size);
src_bottom -= src_bottom - (sby + sb_size);
}
// error out if mv still overlaps with the current superblock
if (src_bottom > sby && src_right > sbx)
return -1;
b->mv[0].x = (src_left - t->bx * 4) * 8;
b->mv[0].y = (src_top - t->by * 4) * 8;
if (DEBUG_BLOCK_INFO)
printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n" ,
b->mv[0].y, b->mv[0].x, ref.y, ref.x,
mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
read_vartx_tree(t, b, bs, bx4, by4);
// reconstruction
if (t->frame_thread.pass == 1) {
f->bd_fn.read_coef_blocks(t, bs, b);
b->filter2d = FILTER_2D_BILINEAR;
} else {
if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
}
splat_intrabc_mv(f->c, t, bs, b, bw4, bh4);
BlockContext *edge = t->a;
for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
#define set_ctx(rep_macro) \
rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
rep_macro(edge->mode, off, DC_PRED); \
rep_macro(edge->pal_sz, off, 0); \
/* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
rep_macro(t->pal_sz_uv[i], off, 0); \
rep_macro(edge->seg_pred, off, seg_pred); \
rep_macro(edge->skip_mode, off, 0); \
rep_macro(edge->intra, off, 0); \
rep_macro(edge->skip, off, b->skip)
case_set(b_dim[2 + i]);
#undef set_ctx
}
if (has_chroma) {
dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
}
} else {
// inter-specific mode/mv coding
int is_comp, has_subpel_filter;
if (b->skip_mode) {
is_comp = 1;
} else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
{
const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp[ctx]);
if (DEBUG_BLOCK_INFO)
printf("Post-compflag[%d]: r=%d\n" , is_comp, ts->msac.rng);
} else {
is_comp = 0;
}
if (b->skip_mode) {
b->ref[0] = f->frame_hdr->skip_mode_refs[0];
b->ref[1] = f->frame_hdr->skip_mode_refs[1];
b->comp_type = COMP_INTER_AVG;
b->inter_mode = NEARESTMV_NEARESTMV;
b->drl_idx = NEAREST_DRL;
has_subpel_filter = 0;
refmvs_candidate mvstack[8];
int n_mvs, ctx;
dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
(union refmvs_refpair) { .ref = {
b->ref[0] + 1, b->ref[1] + 1 }},
bs, intra_edge_flags, t->by, t->bx);
b->mv[0] = mvstack[0].mv.mv[0];
b->mv[1] = mvstack[0].mv.mv[1];
fix_mv_precision(f->frame_hdr, &b->mv[0]);
fix_mv_precision(f->frame_hdr, &b->mv[1]);
if (DEBUG_BLOCK_INFO)
printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n" ,
b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
b->ref[0], b->ref[1]);
} else if (is_comp) {
const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
if (dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_dir[dir_ctx]))
{
// bidir - first reference (fw)
const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
if (dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_fwd_ref[0][ctx1]))
{
const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_fwd_ref[2][ctx2]);
} else {
const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_fwd_ref[1][ctx2]);
}
// second reference (bw)
const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
if (dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_bwd_ref[0][ctx3]))
{
b->ref[1] = 6;
} else {
const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_bwd_ref[1][ctx4]);
}
} else {
// unidir
const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
if (dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_uni_ref[0][uctx_p]))
{
b->ref[0] = 4;
b->ref[1] = 6;
} else {
const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
b->ref[0] = 0;
b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_uni_ref[1][uctx_p1]);
if (b->ref[1] == 2) {
const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
have_top, have_left);
b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.comp_uni_ref[2][uctx_p2]);
}
}
}
if (DEBUG_BLOCK_INFO)
printf("Post-refs[%d/%d]: r=%d\n" ,
b->ref[0], b->ref[1], ts->msac.rng);
refmvs_candidate mvstack[8];
int n_mvs, ctx;
dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
(union refmvs_refpair) { .ref = {
b->ref[0] + 1, b->ref[1] + 1 }},
bs, intra_edge_flags, t->by, t->bx);
b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.comp_inter_mode[ctx],
N_COMP_INTER_PRED_MODES - 1);
if (DEBUG_BLOCK_INFO)
printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n" ,
b->inter_mode, ctx, n_mvs, ts->msac.rng);
const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
b->drl_idx = NEAREST_DRL;
if (b->inter_mode == NEWMV_NEWMV) {
if (n_mvs > 1) { // NEARER, NEAR or NEARISH
const int drl_ctx_v1 = get_drl_context(mvstack, 0);
b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.drl_bit[drl_ctx_v1]);
if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
const int drl_ctx_v2 = get_drl_context(mvstack, 1);
b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.drl_bit[drl_ctx_v2]);
}
if (DEBUG_BLOCK_INFO)
printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n" ,
b->drl_idx, n_mvs, ts->msac.rng);
}
} else if (im[0] == NEARMV || im[1] == NEARMV) {
b->drl_idx = NEARER_DRL;
if (n_mvs > 2) { // NEAR or NEARISH
const int drl_ctx_v2 = get_drl_context(mvstack, 1);
b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.drl_bit[drl_ctx_v2]);
if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
const int drl_ctx_v3 = get_drl_context(mvstack, 2);
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5 C=93 H=93 G=92
¤ Dauer der Verarbeitung: 0.24 Sekunden
(vorverarbeitet)
¤
*© Formatika GbR, Deutschland