Quelle decode.c Sprache: C

/*
* Copyright © 2018-2021, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "config.h"

#include <errno.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>
#include <inttypes.h>

#include "dav1d/data.h"

#include "common/frame.h"
#include "common/intops.h"

#include "src/ctx.h"
#include "src/decode.h"
#include "src/dequant_tables.h"
#include "src/env.h"
#include "src/filmgrain.h"
#include "src/log.h"
#include "src/qm.h"
#include "src/recon.h"
#include "src/ref.h"
#include "src/tables.h"
#include "src/thread_task.h"
#include "src/warpmv.h"

static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
                              const Dav1dFrameHeader *const frame_hdr,
                              const int qidx, uint16_t (*dq)[3][2])
{
    for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
        const int yac = frame_hdr->segmentation.enabled ?
            iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
        const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
        const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
        const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
        const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
        const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);

        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
    }
}

static int read_mv_component_diff(MsacContext *const msac,
                                  CdfMvComponent *const mv_comp,
                                  const int mv_prec)
{
    const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
    const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
    int up, fp = 3, hp = 1;

    if (!cl) {
        up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
        if (mv_prec >= 0) {  // !force_integer_mv
            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
            if (mv_prec > 0) // allow_high_precision_mv
                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
        }
    } else {
        up = 1 << cl;
        for (int n = 0; n < cl; n++)
            up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
        if (mv_prec >= 0) {  // !force_integer_mv
            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
            if (mv_prec > 0) // allow_high_precision_mv
                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
        }
    }

    const int diff = ((up << 3) | (fp << 1) | hp) + 1;

    return sign ? -diff : diff;
}

static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv,
                             const int mv_prec)
{
    MsacContext *const msac = &ts->msac;
    const enum MVJoint mv_joint =
        dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
    if (mv_joint & MV_JOINT_V)
        ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
    if (mv_joint & MV_JOINT_H)
        ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
}

static void read_tx_tree(Dav1dTaskContext *const t,
                         const enum RectTxfmSize from,
                         const int depth, uint16_t *const masks,
                         const int x_off, const int y_off)
{
    const Dav1dFrameContext *const f = t->f;
    const int bx4 = t->bx & 31, by4 = t->by & 31;
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
    const int txw = t_dim->lw, txh = t_dim->lh;
    int is_split;

    if (depth < 2 && from > (int) TX_4X4) {
        const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
        const int a = t->a->tx[bx4] < txw;
        const int l = t->l.tx[by4] < txh;

        is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
                       t->ts->cdf.m.txpart[cat][a + l]);
        if (is_split)
            masks[depth] |= 1 << (y_off * 4 + x_off);
    } else {
        is_split = 0;
    }

    if (is_split && t_dim->max > TX_8X8) {
        const enum RectTxfmSize sub = t_dim->sub;
        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;

        read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
        t->bx += txsw;
        if (txw >= txh && t->bx < f->bw)
            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
        t->bx -= txsw;
        t->by += txsh;
        if (txh >= txw && t->by < f->bh) {
            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
            t->bx += txsw;
            if (txw >= txh && t->bx < f->bw)
                read_tx_tree(t, sub, depth + 1, masks,
                             x_off * 2 + 1, y_off * 2 + 1);
            t->bx -= txsw;
        }
        t->by -= txsh;
    } else {
        dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw);
        dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh);
    }
}

static int neg_deinterleave(int diff, int ref, int max) {
    if (!ref) return diff;
    if (ref >= (max - 1)) return max - diff - 1;
    if (2 * ref < max) {
        if (diff <= 2 * ref) {
            if (diff & 1)
                return ref + ((diff + 1) >> 1);
            else
                return ref - (diff >> 1);
        }
        return diff;
    } else {
        if (diff <= 2 * (max - ref - 1)) {
            if (diff & 1)
                return ref + ((diff + 1) >> 1);
            else
                return ref - (diff >> 1);
        }
        return max - (diff + 1);
    }
}

static void find_matching_ref(const Dav1dTaskContext *const t,
                              const enum EdgeFlags intra_edge_flags,
                              const int bw4, const int bh4,
                              const int w4, const int h4,
                              const int have_left, const int have_top,
                              const int ref, uint64_t masks[2])
{
    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
    int count = 0;
    int have_topleft = have_top && have_left;
    int have_topright = imax(bw4, bh4) < 32 &&
                        have_top && t->bx + bw4 < t->ts->tiling.col_end &&
                        (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);

#define bs(rp) dav1d_block_dimensions[(rp)->bs]
#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)

    if (have_top) {
        const refmvs_block *r2 = &r[-1][t->bx];
        if (matches(r2)) {
            masks[0] |= 1;
            count = 1;
        }
        int aw4 = bs(r2)[0];
        if (aw4 >= bw4) {
            const int off = t->bx & (aw4 - 1);
            if (off) have_topleft = 0;
            if (aw4 - off > bw4) have_topright = 0;
        } else {
            unsigned mask = 1 << aw4;
            for (int x = aw4; x < w4; x += aw4) {
                r2 += aw4;
                if (matches(r2)) {
                    masks[0] |= mask;
                    if (++count >= 8) return;
                }
                aw4 = bs(r2)[0];
                mask <<= aw4;
            }
        }
    }
    if (have_left) {
        /*const*/ refmvs_block *const *r2 = r;
        if (matches(&r2[0][t->bx - 1])) {
            masks[1] |= 1;
            if (++count >= 8) return;
        }
        int lh4 = bs(&r2[0][t->bx - 1])[1];
        if (lh4 >= bh4) {
            if (t->by & (lh4 - 1)) have_topleft = 0;
        } else {
            unsigned mask = 1 << lh4;
            for (int y = lh4; y < h4; y += lh4) {
                r2 += lh4;
                if (matches(&r2[0][t->bx - 1])) {
                    masks[1] |= mask;
                    if (++count >= 8) return;
                }
                lh4 = bs(&r2[0][t->bx - 1])[1];
                mask <<= lh4;
            }
        }
    }
    if (have_topleft && matches(&r[-1][t->bx - 1])) {
        masks[1] |= 1ULL << 32;
        if (++count >= 8) return;
    }
    if (have_topright && matches(&r[-1][t->bx + bw4])) {
        masks[0] |= 1ULL << 32;
    }
#undef matches
}

static void derive_warpmv(const Dav1dTaskContext *const t,
                          const int bw4, const int bh4,
                          const uint64_t masks[2], const union mv mv,
                          Dav1dWarpedMotionParams *const wmp)
{
    int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];

#define add_sample(dx, dy, sx, sy, rp) do { \
    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
    np++; \
} while (0)

    // use masks[] to find the projectable motion vectors in the edges
    if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
        const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
        add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
    } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
        const int tz = ctz(xmask);
        off += tz;
        xmask >>= tz;
        add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
        xmask &= ~1;
    }
    if (np < 8 && masks[1] == 1) {
        const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
        add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
    } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
        const int tz = ctz(ymask);
        off += tz;
        ymask >>= tz;
        add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
        ymask &= ~1;
    }
    if (np < 8 && masks[1] >> 32) // top/left
        add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
    if (np < 8 && masks[0] >> 32) // top/right
        add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
    assert(np > 0 && np <= 8);
#undef bs

    // select according to motion vector difference against a threshold
    int mvd[8], ret = 0;
    const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
    for (int i = 0; i < np; i++) {
        mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
                 abs(pts[i][1][1] - pts[i][0][1] - mv.y);
        if (mvd[i] > thresh)
            mvd[i] = -1;
        else
            ret++;
    }
    if (!ret) {
        ret = 1;
    } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
        while (mvd[i] != -1) i++;
        while (mvd[j] == -1) j--;
        assert(i != j);
        if (i > j) break;
        // replace the discarded samples;
        mvd[i] = mvd[j];
        memcpy(pts[i], pts[j], sizeof(*pts));
    }

    if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
        !dav1d_get_shear_params(wmp))
    {
        wmp->type = DAV1D_WM_TYPE_AFFINE;
    } else
        wmp->type = DAV1D_WM_TYPE_IDENTITY;
}

static inline int findoddzero(const uint8_t *buf, int len) {
    for (int n = 0; n < len; n++)
        if (!buf[n * 2]) return 1;
    return 0;
}

// meant to be SIMD'able, so that theoretical complexity of this function
// times block size goes from w4*h4 to w4+h4-1
// a and b are previous two lines containing (a) top/left entries or (b)
// top/left entries, with a[0] being either the first top or first left entry,
// depending on top_offset being 1 or 0, and b being the first top/left entry
// for whichever has one. left_offset indicates whether the (len-1)th entry
// has a left neighbour.
// output is order[] and ctx for each member of this diagonal.
static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride,
                          const int i, const int first, const int last,
                          uint8_t (*const order)[8], uint8_t *const ctx)
{
    int have_top = i > first;

    assert(pal_idx);
    pal_idx += first + (i - first) * stride;
    for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
        const int have_left = j > 0;

        assert(have_left || have_top);

#define add(v_in) do { \
        const int v = v_in; \
        assert((unsigned)v < 8U); \
        order[n][o_idx++] = v; \
        mask |= 1 << v; \
    } while (0)

        unsigned mask = 0;
        int o_idx = 0;
        if (!have_left) {
            ctx[n] = 0;
            add(pal_idx[-stride]);
        } else if (!have_top) {
            ctx[n] = 0;
            add(pal_idx[-1]);
        } else {
            const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
            const int same_t_l = t == l;
            const int same_t_tl = t == tl;
            const int same_l_tl = l == tl;
            const int same_all = same_t_l & same_t_tl & same_l_tl;

            if (same_all) {
                ctx[n] = 4;
                add(t);
            } else if (same_t_l) {
                ctx[n] = 3;
                add(t);
                add(tl);
            } else if (same_t_tl | same_l_tl) {
                ctx[n] = 2;
                add(tl);
                add(same_t_tl ? l : t);
            } else {
                ctx[n] = 1;
                add(imin(t, l));
                add(imax(t, l));
                add(tl);
            }
        }
        for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
            if (!(mask & m))
                order[n][o_idx++] = bit;
        assert(o_idx == 8);
#undef add
    }
}

static void read_pal_indices(Dav1dTaskContext *const t,
                             uint8_t *const pal_idx,
                             const int pal_sz, const int pl,
                             const int w4, const int h4,
                             const int bw4, const int bh4)
{
    Dav1dTileState *const ts = t->ts;
    const ptrdiff_t stride = bw4 * 4;
    assert(pal_idx);
    uint8_t *const pal_tmp = t->scratch.pal_idx_uv;
    pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz);
    uint16_t (*const color_map_cdf)[8] =
        ts->cdf.m.color_map[pl][pal_sz - 2];
    uint8_t (*const order)[8] = t->scratch.pal_order;
    uint8_t *const ctx = t->scratch.pal_ctx;
    for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
        // top/left-to-bottom/right diagonals ("wave-front")
        const int first = imin(i, w4 * 4 - 1);
        const int last = imax(0, i - h4 * 4 + 1);
        order_palette(pal_tmp, stride, i, first, last, order, ctx);
        for (int j = first, m = 0; j >= last; j--, m++) {
            const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
                                      color_map_cdf[ctx[m]], pal_sz - 1);
            pal_tmp[(i - j) * stride + j] = order[m][color_idx];
        }
    }

    t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4,
                                 w4 * 4, h4 * 4);
}

static void read_vartx_tree(Dav1dTaskContext *const t,
                            Av1Block *const b, const enum BlockSize bs,
                            const int bx4, const int by4)
{
    const Dav1dFrameContext *const f = t->f;
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
    const int bw4 = b_dim[0], bh4 = b_dim[1];

    // var-tx tree coding
    uint16_t tx_split[2] = { 0 };
    b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
    if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
                     b->max_ytx == TX_4X4))
    {
        b->max_ytx = b->uvtx = TX_4X4;
        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4);
            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4);
        }
    } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]);
            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]);
        }
        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
    } else {
        assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
        int y, x, y_off, x_off;
        const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
        for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
            for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
                read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
                // contexts are updated inside read_tx_tree()
                t->bx += ytx->w;
            }
            t->bx -= x;
            t->by += ytx->h;
        }
        t->by -= y;
        if (DEBUG_BLOCK_INFO)
            printf("Post-vartxtree[%x/%x]: r=%d\n",
                   tx_split[0], tx_split[1], t->ts->msac.rng);
        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
    }
    assert(!(tx_split[0] & ~0x33));
    b->tx_split0 = (uint8_t)tx_split[0];
    b->tx_split1 = tx_split[1];
}

static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f,
                                            const int by, const int bx,
                                            const int w4, int h4,
                                            const uint8_t *ref_seg_map,
                                            const ptrdiff_t stride)
{
    assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);

    unsigned seg_id = 8;
    ref_seg_map += by * stride + bx;
    do {
        for (int x = 0; x < w4; x++)
            seg_id = imin(seg_id, ref_seg_map[x]);
        ref_seg_map += stride;
    } while (--h4 > 0 && seg_id);
    assert(seg_id < 8);

    return seg_id;
}

static inline void splat_oneref_mv(const Dav1dContext *const c,
                                   Dav1dTaskContext *const t,
                                   const enum BlockSize bs,
                                   const Av1Block *const b,
                                   const int bw4, const int bh4)
{
    const enum InterPredMode mode = b->inter_mode;
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
        .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 },
        .mv.mv[0] = b->mv[0],
        .bs = bs,
        .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2),
    };
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}

static inline void splat_intrabc_mv(const Dav1dContext *const c,
                                    Dav1dTaskContext *const t,
                                    const enum BlockSize bs,
                                    const Av1Block *const b,
                                    const int bw4, const int bh4)
{
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
        .ref.ref = { 0, -1 },
        .mv.mv[0] = b->mv[0],
        .bs = bs,
        .mf = 0,
    };
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}

static inline void splat_tworef_mv(const Dav1dContext *const c,
                                   Dav1dTaskContext *const t,
                                   const enum BlockSize bs,
                                   const Av1Block *const b,
                                   const int bw4, const int bh4)
{
    assert(bw4 >= 2 && bh4 >= 2);
    const enum CompInterPredMode mode = b->inter_mode;
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
        .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 },
        .mv.mv = { b->mv[0], b->mv[1] },
        .bs = bs,
        .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2,
    };
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}

static inline void splat_intraref(const Dav1dContext *const c,
                                  Dav1dTaskContext *const t,
                                  const enum BlockSize bs,
                                  const int bw4, const int bh4)
{
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
        .ref.ref = { 0, -1 },
        .mv.mv[0].n = INVALID_MV,
        .bs = bs,
        .mf = 0,
    };
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
}

static void mc_lowest_px(int *const dst, const int by4, const int bh4,
                         const int mvy, const int ss_ver,
                         const struct ScalableMotionParams *const smp)
{
    const int v_mul = 4 >> ss_ver;
    if (!smp->scale) {
        const int my = mvy >> (3 + ss_ver), dy = mvy & (15 >> !ss_ver);
        *dst = imax(*dst, (by4 + bh4) * v_mul + my + 4 * !!dy);
    } else {
        int y = (by4 * v_mul << 4) + mvy * (1 << !ss_ver);
        const int64_t tmp = (int64_t)(y) * smp->scale + (smp->scale - 0x4000) * 8;
        y = apply_sign64((int)((llabs(tmp) + 128) >> 8), tmp) + 32;
        const int bottom = ((y + (bh4 * v_mul - 1) * smp->step) >> 10) + 1 + 4;
        *dst = imax(*dst, bottom);
    }
}

static ALWAYS_INLINE void affine_lowest_px(Dav1dTaskContext *const t, int *const dst,
                                           const uint8_t *const b_dim,
                                           const Dav1dWarpedMotionParams *const wmp,
                                           const int ss_ver, const int ss_hor)
{
    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
    assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
    const int32_t *const mat = wmp->matrix;
    const int y = b_dim[1] * v_mul - 8; // lowest y

    const int src_y = t->by * 4 + ((y + 4) << ss_ver);
    const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
    // check left- and right-most blocks
    for (int x = 0; x < b_dim[0] * h_mul; x += imax(8, b_dim[0] * h_mul - 8)) {
        // calculate transformation relative to center of 8x8 block in
        // luma pixel units
        const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
        const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
        const int dy = (int) (mvy >> 16) - 4;
        *dst = imax(*dst, dy + 4 + 8);
    }
}

static NOINLINE void affine_lowest_px_luma(Dav1dTaskContext *const t, int *const dst,
                                           const uint8_t *const b_dim,
                                           const Dav1dWarpedMotionParams *const wmp)
{
    affine_lowest_px(t, dst, b_dim, wmp, 0, 0);
}

static NOINLINE void affine_lowest_px_chroma(Dav1dTaskContext *const t, int *const dst,
                                             const uint8_t *const b_dim,
                                             const Dav1dWarpedMotionParams *const wmp)
{
    const Dav1dFrameContext *const f = t->f;
    assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400);
    if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444)
        affine_lowest_px_luma(t, dst, b_dim, wmp);
    else
        affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1);
}

static void obmc_lowest_px(Dav1dTaskContext *const t,
                           int (*const dst)[2], const int is_chroma,
                           const uint8_t *const b_dim,
                           const int bx4, const int by4, const int w4, const int h4)
{
    assert(!(t->bx & 1) && !(t->by & 1));
    const Dav1dFrameContext *const f = t->f;
    /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
    const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;

    if (t->by > t->ts->tiling.row_start &&
        (!is_chroma || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
    {
        for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
            // only odd blocks are considered for overlap handling, hence +1
            const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
            const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];

            if (a_r->ref.ref[0] > 0) {
                const int oh4 = imin(b_dim[1], 16) >> 1;
                mc_lowest_px(&dst[a_r->ref.ref[0] - 1][is_chroma], t->by,
                             (oh4 * 3 + 3) >> 2, a_r->mv.mv[0].y, ss_ver,
                             &f->svc[a_r->ref.ref[0] - 1][1]);
                i++;
            }
            x += imax(a_b_dim[0], 2);
        }
    }

    if (t->bx > t->ts->tiling.col_start)
        for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
            // only odd blocks are considered for overlap handling, hence +1
            const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
            const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];

            if (l_r->ref.ref[0] > 0) {
                const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
                mc_lowest_px(&dst[l_r->ref.ref[0] - 1][is_chroma],
                             t->by + y, oh4, l_r->mv.mv[0].y, ss_ver,
                             &f->svc[l_r->ref.ref[0] - 1][1]);
                i++;
            }
            y += imax(l_b_dim[1], 2);
        }
}

static int decode_b(Dav1dTaskContext *const t,
                    const enum BlockLevel bl,
                    const enum BlockSize bs,
                    const enum BlockPartition bp,
                    const enum EdgeFlags intra_edge_flags) {
    Dav1dTileState *const ts = t->ts;
    const Dav1dFrameContext *const f = t->f;
    Av1Block b_mem, *const b = t->frame_thread.pass ?
        &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
    const int bx4 = t->bx & 31, by4 = t->by & 31;
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
    const int bw4 = b_dim[0], bh4 = b_dim[1];
    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
    const int have_left = t->bx > ts->tiling.col_start;
    const int have_top = t->by > ts->tiling.row_start;
    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
                           (bw4 > ss_hor || t->bx & 1) &&
                           (bh4 > ss_ver || t->by & 1);

    if (t->frame_thread.pass == 2) {
        if (b->intra) {
            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);

            const enum IntraPredMode y_mode_nofilt =
                b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
#define set_ctx(rep_macro) \
            rep_macro(edge->mode, off, y_mode_nofilt); \
            rep_macro(edge->intra, off, 1)
            BlockContext *edge = t->a;
            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
                case_set(b_dim[2 + i]);
            }
#undef set_ctx
            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
                for (int x = 0; x < bw4; x++) {
                    r[x].ref.ref[0] = 0;
                    r[x].bs = bs;
                }
                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
                for (int y = 0; y < bh4 - 1; y++) {
                    rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
                    rr[y][t->bx + bw4 - 1].bs = bs;
                }
            }

            if (has_chroma) {
                uint8_t uv_mode = b->uv_mode;
                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
            }
        } else {
            if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
                b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
            {
                if (b->matrix[0] == SHRT_MIN) {
                    t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
                } else {
                    t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
                    t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
                    t->warpmv.matrix[3] = b->matrix[1];
                    t->warpmv.matrix[4] = b->matrix[2];
                    t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
                    dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
                                          t->bx, t->by);
                    dav1d_get_shear_params(&t->warpmv);
#define signabs(v) v < 0 ? '-' : ' ', abs(v)
                    if (DEBUG_BLOCK_INFO)
                        printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n"
                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
                               signabs(t->warpmv.matrix[0]),
                               signabs(t->warpmv.matrix[1]),
                               signabs(t->warpmv.matrix[2]),
                               signabs(t->warpmv.matrix[3]),
                               signabs(t->warpmv.matrix[4]),
                               signabs(t->warpmv.matrix[5]),
                               signabs(t->warpmv.u.p.alpha),
                               signabs(t->warpmv.u.p.beta),
                               signabs(t->warpmv.u.p.gamma),
                               signabs(t->warpmv.u.p.delta),
                               b->mv2d.y, b->mv2d.x);
#undef signabs
                }
            }
            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;

            const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
            BlockContext *edge = t->a;
            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
#define set_ctx(rep_macro) \
                rep_macro(edge->filter[0], off, filter[0]); \
                rep_macro(edge->filter[1], off, filter[1]); \
                rep_macro(edge->intra, off, 0)
                case_set(b_dim[2 + i]);
#undef set_ctx
            }

            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
                for (int x = 0; x < bw4; x++) {
                    r[x].ref.ref[0] = b->ref[0] + 1;
                    r[x].mv.mv[0] = b->mv[0];
                    r[x].bs = bs;
                }
                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
                for (int y = 0; y < bh4 - 1; y++) {
                    rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
                    rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
                    rr[y][t->bx + bw4 - 1].bs = bs;
                }
            }

            if (has_chroma) {
                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
            }
        }
        return 0;
    }

    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;

    b->bl = bl;
    b->bp = bp;
    b->bs = bs;

    const Dav1dSegmentationData *seg = NULL;

    // segment_id (if seg_feature for skip/ref/gmv is enabled)
    int seg_pred = 0;
    if (f->frame_hdr->segmentation.enabled) {
        if (!f->frame_hdr->segmentation.update_map) {
            if (f->prev_segmap) {
                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
                                                       f->prev_segmap,
                                                       f->b4_stride);
                if (seg_id >= 8) return -1;
                b->seg_id = seg_id;
            } else {
                b->seg_id = 0;
            }
            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
        } else if (f->frame_hdr->segmentation.seg_data.preskip) {
            if (f->frame_hdr->segmentation.temporal &&
                (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
                                ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
                                t->l.seg_pred[by4]])))
            {
                // temporal predicted seg_id
                if (f->prev_segmap) {
                    unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
                                                           w4, h4,
                                                           f->prev_segmap,
                                                           f->b4_stride);
                    if (seg_id >= 8) return -1;
                    b->seg_id = seg_id;
                } else {
                    b->seg_id = 0;
                }
            } else {
                int seg_ctx;
                const unsigned pred_seg_id =
                    get_cur_frame_segid(t->by, t->bx, have_top, have_left,
                                        &seg_ctx, f->cur_segmap, f->b4_stride);
                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
                                          ts->cdf.m.seg_id[seg_ctx],
                                          DAV1D_MAX_SEGMENTS - 1);
                const unsigned last_active_seg_id =
                    f->frame_hdr->segmentation.seg_data.last_active_segid;
                b->seg_id = neg_deinterleave(diff, pred_seg_id,
                                             last_active_seg_id + 1);
                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
                if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
            }

            if (DEBUG_BLOCK_INFO)
                printf("Post-segid[preskip;%d]: r=%d\n",
                       b->seg_id, ts->msac.rng);

            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
        }
    } else {
        b->seg_id = 0;
    }

    // skip_mode
    if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
        f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
    {
        const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
        b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
                           ts->cdf.m.skip_mode[smctx]);
        if (DEBUG_BLOCK_INFO)
            printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
    } else {
        b->skip_mode = 0;
    }

    // skip
    if (b->skip_mode || (seg && seg->skip)) {
        b->skip = 1;
    } else {
        const int sctx = t->a->skip[bx4] + t->l.skip[by4];
        b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
        if (DEBUG_BLOCK_INFO)
            printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
    }

    // segment_id
    if (f->frame_hdr->segmentation.enabled &&
        f->frame_hdr->segmentation.update_map &&
        !f->frame_hdr->segmentation.seg_data.preskip)
    {
        if (!b->skip && f->frame_hdr->segmentation.temporal &&
            (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
                            ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
                            t->l.seg_pred[by4]])))
        {
            // temporal predicted seg_id
            if (f->prev_segmap) {
                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
                                                       f->prev_segmap,
                                                       f->b4_stride);
                if (seg_id >= 8) return -1;
                b->seg_id = seg_id;
            } else {
                b->seg_id = 0;
            }
        } else {
            int seg_ctx;
            const unsigned pred_seg_id =
                get_cur_frame_segid(t->by, t->bx, have_top, have_left,
                                    &seg_ctx, f->cur_segmap, f->b4_stride);
            if (b->skip) {
                b->seg_id = pred_seg_id;
            } else {
                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
                                          ts->cdf.m.seg_id[seg_ctx],
                                          DAV1D_MAX_SEGMENTS - 1);
                const unsigned last_active_seg_id =
                    f->frame_hdr->segmentation.seg_data.last_active_segid;
                b->seg_id = neg_deinterleave(diff, pred_seg_id,
                                             last_active_seg_id + 1);
                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
            }
            if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
        }

        seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];

        if (DEBUG_BLOCK_INFO)
            printf("Post-segid[postskip;%d]: r=%d\n",
                   b->seg_id, ts->msac.rng);
    }

    // cdef index
    if (!b->skip) {
        const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
                                           ((t->by & 16) >> 3) : 0;
        if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
            const int v = dav1d_msac_decode_bools(&ts->msac,
                              f->frame_hdr->cdef.n_bits);
            t->cur_sb_cdef_idx_ptr[idx] = v;
            if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
            if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
            if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;

            if (DEBUG_BLOCK_INFO)
                printf("Post-cdef_idx[%d]: r=%d\n",
                        *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
        }
    }

    // delta-q/lf
    if (!(t->bx & (31 >> !f->seq_hdr->sb128)) &&
        !(t->by & (31 >> !f->seq_hdr->sb128)))
    {
        const int prev_qidx = ts->last_qidx;
        const int have_delta_q = f->frame_hdr->delta.q.present &&
            (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);

        uint32_t prev_delta_lf = ts->last_delta_lf.u32;

        if (have_delta_q) {
            int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
                                                          ts->cdf.m.delta_q, 3);
            if (delta_q == 3) {
                const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
                delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
                          1 + (1 << n_bits);
            }
            if (delta_q) {
                if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
                delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
            }
            ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
            if (have_delta_q && DEBUG_BLOCK_INFO)
                printf("Post-delta_q[%d->%d]: r=%d\n",
                       delta_q, ts->last_qidx, ts->msac.rng);

            if (f->frame_hdr->delta.lf.present) {
                const int n_lfs = f->frame_hdr->delta.lf.multi ?
                    f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;

                for (int i = 0; i < n_lfs; i++) {
                    int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
                        ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
                    if (delta_lf == 3) {
                        const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
                        delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
                                   1 + (1 << n_bits);
                    }
                    if (delta_lf) {
                        if (dav1d_msac_decode_bool_equi(&ts->msac))
                            delta_lf = -delta_lf;
                        delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
                    }
                    ts->last_delta_lf.i8[i] =
                        iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
                    if (have_delta_q && DEBUG_BLOCK_INFO)
                        printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
                               ts->msac.rng);
                }
            }
        }
        if (ts->last_qidx == f->frame_hdr->quant.yac) {
            // assign frame-wide q values to this sb
            ts->dq = f->dq;
        } else if (ts->last_qidx != prev_qidx) {
            // find sb-specific quant parameters
            init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
            ts->dq = ts->dqmem;
        }
        if (!ts->last_delta_lf.u32) {
            // assign frame-wide lf values to this sb
            ts->lflvl = f->lf.lvl;
        } else if (ts->last_delta_lf.u32 != prev_delta_lf) {
            // find sb-specific lf lvl parameters
            ts->lflvl = ts->lflvlmem;
            dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
        }
    }

    if (b->skip_mode) {
        b->intra = 0;
    } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
        if (seg && (seg->ref >= 0 || seg->globalmv)) {
            b->intra = !seg->ref;
        } else {
            const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
                                           have_top, have_left);
            b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
                            ts->cdf.m.intra[ictx]);
            if (DEBUG_BLOCK_INFO)
                printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
        }
    } else if (f->frame_hdr->allow_intrabc) {
        b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
        if (DEBUG_BLOCK_INFO)
            printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
    } else {
        b->intra = 1;
    }

    // intra/inter-specific stuff
    if (b->intra) {
        uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
            ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
            ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
                        [dav1d_intra_mode_context[t->l.mode[by4]]];
        b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
                                                     N_INTRA_PRED_MODES - 1);
        if (DEBUG_BLOCK_INFO)
            printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);

        // angle delta
        if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
            b->y_mode <= VERT_LEFT_PRED)
        {
            uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
            const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
            b->y_angle = angle - 3;
        } else {
            b->y_angle = 0;
        }

        if (has_chroma) {
            const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
                cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
            uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
            b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
                             N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
            if (DEBUG_BLOCK_INFO)
                printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);

            b->uv_angle = 0;
            if (b->uv_mode == CFL_PRED) {
#define SIGN(a) (!!(a) + ((a) > 0))
                const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
                                     ts->cdf.m.cfl_sign, 7) + 1;
                const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
                assert(sign_u == sign / 3);
                if (sign_u) {
                    const int ctx = (sign_u == 2) * 3 + sign_v;
                    b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
                    if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
                } else {
                    b->cfl_alpha[0] = 0;
                }
                if (sign_v) {
                    const int ctx = (sign_v == 2) * 3 + sign_u;
                    b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
                    if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
                } else {
                    b->cfl_alpha[1] = 0;
                }
#undef SIGN
                if (DEBUG_BLOCK_INFO)
                    printf("Post-uvalphas[%d/%d]: r=%d\n",
                           b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
            } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
                       b->uv_mode <= VERT_LEFT_PRED)
            {
                uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
                const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
                b->uv_angle = angle - 3;
            }
        }

        b->pal_sz[0] = b->pal_sz[1] = 0;
        if (f->frame_hdr->allow_screen_content_tools &&
            imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
        {
            const int sz_ctx = b_dim[2] + b_dim[3] - 2;
            if (b->y_mode == DC_PRED) {
                const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
                const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
                                          ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
                if (DEBUG_BLOCK_INFO)
                    printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
                if (use_y_pal)
                    f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
            }

            if (has_chroma && b->uv_mode == DC_PRED) {
                const int pal_ctx = b->pal_sz[0] > 0;
                const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
                                           ts->cdf.m.pal_uv[pal_ctx]);
                if (DEBUG_BLOCK_INFO)
                    printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
                if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
                    f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4);
            }
        }

        if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
            imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
        {
            const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
                                      ts->cdf.m.use_filter_intra[bs]);
            if (is_filter) {
                b->y_mode = FILTER_PRED;
                b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac,
                                 ts->cdf.m.filter_intra, 4);
            }
            if (DEBUG_BLOCK_INFO)
                printf("Post-filterintramode[%d/%d]: r=%d\n",
                       b->y_mode, b->y_angle, ts->msac.rng);
        }

        if (b->pal_sz[0]) {
            uint8_t *pal_idx;
            if (t->frame_thread.pass) {
                const int p = t->frame_thread.pass & 1;
                assert(ts->frame_thread[p].pal_idx);
                pal_idx = ts->frame_thread[p].pal_idx;
                ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
            } else
                pal_idx = t->scratch.pal_idx_y;
            read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4);
            if (DEBUG_BLOCK_INFO)
                printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
        }

        if (has_chroma && b->pal_sz[1]) {
            uint8_t *pal_idx;
            if (t->frame_thread.pass) {
                const int p = t->frame_thread.pass & 1;
                assert(ts->frame_thread[p].pal_idx);
                pal_idx = ts->frame_thread[p].pal_idx;
                ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
            } else
                pal_idx = t->scratch.pal_idx_uv;
            read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4);
            if (DEBUG_BLOCK_INFO)
                printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
        }

        const TxfmInfo *t_dim;
        if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
            b->tx = b->uvtx = (int) TX_4X4;
            t_dim = &dav1d_txfm_dimensions[TX_4X4];
        } else {
            b->tx = dav1d_max_txfm_size_for_bs[bs][0];
            b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
            t_dim = &dav1d_txfm_dimensions[b->tx];
            if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
                const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
                uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
                int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
                                imin(t_dim->max, 2));

                while (depth--) {
                    b->tx = t_dim->sub;
                    t_dim = &dav1d_txfm_dimensions[b->tx];
                }
            }
            if (DEBUG_BLOCK_INFO)
                printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
        }

        // reconstruction
        if (t->frame_thread.pass == 1) {
            f->bd_fn.read_coef_blocks(t, bs, b);
        } else {
            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
        }

        if (f->frame_hdr->loopfilter.level_y[0] ||
            f->frame_hdr->loopfilter.level_y[1])
        {
            dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
                                       (const uint8_t (*)[8][2])
                                       &ts->lflvl[b->seg_id][0][0][0],
                                       t->bx, t->by, f->w4, f->h4, bs,
                                       b->tx, b->uvtx, f->cur.p.layout,
                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
        }
        // update contexts
        const enum IntraPredMode y_mode_nofilt =
            b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
        BlockContext *edge = t->a;
        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
            int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh
#define set_ctx(rep_macro) \
            rep_macro(edge->tx_intra, off, t_lsz); \
            rep_macro(edge->tx, off, t_lsz); \
            rep_macro(edge->mode, off, y_mode_nofilt); \
            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
            rep_macro(edge->seg_pred, off, seg_pred); \
            rep_macro(edge->skip_mode, off, 0); \
            rep_macro(edge->intra, off, 1); \
            rep_macro(edge->skip, off, b->skip); \
            /* see aomedia bug 2183 for why we use luma coordinates here */ \
            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
            }
            case_set(b_dim[2 + i]);
#undef set_ctx
        }
        if (b->pal_sz[0])
            f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4);
        if (has_chroma) {
            uint8_t uv_mode = b->uv_mode;
            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
            if (b->pal_sz[1])
                f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4);
        }
        if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc)
            splat_intraref(f->c, t, bs, bw4, bh4);
    } else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
        // intra block copy
        refmvs_candidate mvstack[8];
        int n_mvs, ctx;
        dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
                          (union refmvs_refpair) { .ref = { 0, -1 }},
                          bs, intra_edge_flags, t->by, t->bx);

        if (mvstack[0].mv.mv[0].n)
            b->mv[0] = mvstack[0].mv.mv[0];
        else if (mvstack[1].mv.mv[0].n)
            b->mv[0] = mvstack[1].mv.mv[0];
        else {
            if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
                b->mv[0].y = 0;
                b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
            } else {
                b->mv[0].y = -(512 << f->seq_hdr->sb128);
                b->mv[0].x = 0;
            }
        }

        const union mv ref = b->mv[0];
        read_mv_residual(ts, &b->mv[0], -1);

        // clip intrabc motion vector to decoded parts of current tile
        int border_left = ts->tiling.col_start * 4;
        int border_top  = ts->tiling.row_start * 4;
        if (has_chroma) {
            if (bw4 < 2 &&  ss_hor)
                border_left += 4;
            if (bh4 < 2 &&  ss_ver)
                border_top  += 4;
        }
        int src_left   = t->bx * 4 + (b->mv[0].x >> 3);
        int src_top    = t->by * 4 + (b->mv[0].y >> 3);
        int src_right  = src_left + bw4 * 4;
        int src_bottom = src_top  + bh4 * 4;
        const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;

        // check against left or right tile boundary and adjust if necessary
        if (src_left < border_left) {
            src_right += border_left - src_left;
            src_left  += border_left - src_left;
        } else if (src_right > border_right) {
            src_left  -= src_right - border_right;
            src_right -= src_right - border_right;
        }
        // check against top tile boundary and adjust if necessary
        if (src_top < border_top) {
            src_bottom += border_top - src_top;
            src_top    += border_top - src_top;
        }

        const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
        const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
        const int sb_size = 1 << (6 + f->seq_hdr->sb128);
        // check for overlap with current superblock
        if (src_bottom > sby && src_right > sbx) {
            if (src_top - border_top >= src_bottom - sby) {
                // if possible move src up into the previous suberblock row
                src_top    -= src_bottom - sby;
                src_bottom -= src_bottom - sby;
            } else if (src_left - border_left >= src_right - sbx) {
                // if possible move src left into the previous suberblock
                src_left  -= src_right - sbx;
                src_right -= src_right - sbx;
            }
        }
        // move src up if it is below current superblock row
        if (src_bottom > sby + sb_size) {
            src_top    -= src_bottom - (sby + sb_size);
            src_bottom -= src_bottom - (sby + sb_size);
        }
        // error out if mv still overlaps with the current superblock
        if (src_bottom > sby && src_right > sbx)
            return -1;

        b->mv[0].x = (src_left - t->bx * 4) * 8;
        b->mv[0].y = (src_top  - t->by * 4) * 8;

        if (DEBUG_BLOCK_INFO)
            printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
                   b->mv[0].y, b->mv[0].x, ref.y, ref.x,
                   mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
        read_vartx_tree(t, b, bs, bx4, by4);

        // reconstruction
        if (t->frame_thread.pass == 1) {
            f->bd_fn.read_coef_blocks(t, bs, b);
            b->filter2d = FILTER_2D_BILINEAR;
        } else {
            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
        }

        splat_intrabc_mv(f->c, t, bs, b, bw4, bh4);
        BlockContext *edge = t->a;
        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
#define set_ctx(rep_macro) \
            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
            rep_macro(edge->mode, off, DC_PRED); \
            rep_macro(edge->pal_sz, off, 0); \
            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
            rep_macro(t->pal_sz_uv[i], off, 0); \
            rep_macro(edge->seg_pred, off, seg_pred); \
            rep_macro(edge->skip_mode, off, 0); \
            rep_macro(edge->intra, off, 0); \
            rep_macro(edge->skip, off, b->skip)
            case_set(b_dim[2 + i]);
#undef set_ctx
        }
        if (has_chroma) {
            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
        }
    } else {
        // inter-specific mode/mv coding
        int is_comp, has_subpel_filter;

        if (b->skip_mode) {
            is_comp = 1;
        } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
                   f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
        {
            const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
                                         have_top, have_left);
            is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
                          ts->cdf.m.comp[ctx]);
            if (DEBUG_BLOCK_INFO)
                printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
        } else {
            is_comp = 0;
        }

        if (b->skip_mode) {
            b->ref[0] = f->frame_hdr->skip_mode_refs[0];
            b->ref[1] = f->frame_hdr->skip_mode_refs[1];
            b->comp_type = COMP_INTER_AVG;
            b->inter_mode = NEARESTMV_NEARESTMV;
            b->drl_idx = NEAREST_DRL;
            has_subpel_filter = 0;

            refmvs_candidate mvstack[8];
            int n_mvs, ctx;
            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
                              (union refmvs_refpair) { .ref = {
                                    b->ref[0] + 1, b->ref[1] + 1 }},
                              bs, intra_edge_flags, t->by, t->bx);

            b->mv[0] = mvstack[0].mv.mv[0];
            b->mv[1] = mvstack[0].mv.mv[1];
            fix_mv_precision(f->frame_hdr, &b->mv[0]);
            fix_mv_precision(f->frame_hdr, &b->mv[1]);
            if (DEBUG_BLOCK_INFO)
                printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
                       b->ref[0], b->ref[1]);
        } else if (is_comp) {
            const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
                                                 have_top, have_left);
            if (dav1d_msac_decode_bool_adapt(&ts->msac,
                    ts->cdf.m.comp_dir[dir_ctx]))
            {
                // bidir - first reference (fw)
                const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
                                                     have_top, have_left);
                if (dav1d_msac_decode_bool_adapt(&ts->msac,
                        ts->cdf.m.comp_fwd_ref[0][ctx1]))
                {
                    const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
                                                           have_top, have_left);
                    b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
                                        ts->cdf.m.comp_fwd_ref[2][ctx2]);
                } else {
                    const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
                                                           have_top, have_left);
                    b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
                                    ts->cdf.m.comp_fwd_ref[1][ctx2]);
                }

                // second reference (bw)
                const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
                                                     have_top, have_left);
                if (dav1d_msac_decode_bool_adapt(&ts->msac,
                        ts->cdf.m.comp_bwd_ref[0][ctx3]))
                {
                    b->ref[1] = 6;
                } else {
                    const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
                                                           have_top, have_left);
                    b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
                                        ts->cdf.m.comp_bwd_ref[1][ctx4]);
                }
            } else {
                // unidir
                const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
                                                     have_top, have_left);
                if (dav1d_msac_decode_bool_adapt(&ts->msac,
                        ts->cdf.m.comp_uni_ref[0][uctx_p]))
                {
                    b->ref[0] = 4;
                    b->ref[1] = 6;
                } else {
                    const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
                                                           have_top, have_left);
                    b->ref[0] = 0;
                    b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
                                        ts->cdf.m.comp_uni_ref[1][uctx_p1]);
                    if (b->ref[1] == 2) {
                        const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
                                                               have_top, have_left);
                        b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
                                         ts->cdf.m.comp_uni_ref[2][uctx_p2]);
                    }
                }
            }
            if (DEBUG_BLOCK_INFO)
                printf("Post-refs[%d/%d]: r=%d\n",
                       b->ref[0], b->ref[1], ts->msac.rng);

            refmvs_candidate mvstack[8];
            int n_mvs, ctx;
            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
                              (union refmvs_refpair) { .ref = {
                                    b->ref[0] + 1, b->ref[1] + 1 }},
                              bs, intra_edge_flags, t->by, t->bx);

            b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
                                ts->cdf.m.comp_inter_mode[ctx],
                                N_COMP_INTER_PRED_MODES - 1);
            if (DEBUG_BLOCK_INFO)
                printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
                       b->inter_mode, ctx, n_mvs, ts->msac.rng);

            const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
            b->drl_idx = NEAREST_DRL;
            if (b->inter_mode == NEWMV_NEWMV) {
                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
                    }
                    if (DEBUG_BLOCK_INFO)
                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
                               b->drl_idx, n_mvs, ts->msac.rng);
                }
            } else if (im[0] == NEARMV || im[1] == NEARMV) {
                b->drl_idx = NEARER_DRL;
                if (n_mvs > 2) { // NEAR or NEARISH
                    const int drl_ctx_v2 = get_drl_context(mvstack, 1);
                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
                                      ts->cdf.m.drl_bit[drl_ctx_v2]);
                    if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
                        const int drl_ctx_v3 = get_drl_context(mvstack, 2);
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5

¤ Dauer der Verarbeitung: 0.22 Sekunden (vorverarbeitet) ¤

Normalansicht

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.