/* * Copyright (c) 2020, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// Reset speed features that works for the baseline encoding, but // blocks the external partition search. void av1_reset_sf_for_ext_part(AV1_COMP *const cpi) {
cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions = 0;
} #endif// CONFIG_PARTITION_SEARCH_ORDER
#if !CONFIG_REALTIME_ONLY // If input |features| is NULL, write tpl stats to file for each super block. // Otherwise, store tpl stats to |features|. // The tpl stats is computed in the unit of tpl_bsize_1d (16x16). // When writing to text file: // The first row contains super block position, super block size, // tpl unit length, number of units in the super block. // The second row contains the intra prediction cost for each unit. // The third row contains the inter prediction cost for each unit. // The forth row contains the motion compensated dependency cost for each unit. staticvoid collect_tpl_stats_sb(const AV1_COMP *const cpi, const BLOCK_SIZE bsize, constint mi_row, constint mi_col,
aom_partition_features_t *features) { const AV1_COMMON *const cm = &cpi->common;
GF_GROUP *gf_group = &cpi->ppi->gf_group; if (gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE ||
gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) { return;
}
TplParams *const tpl_data = &cpi->ppi->tpl_data;
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cpi->gf_frame_index];
TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; // If tpl stats is not established, early return if (!tpl_data->ready || gf_group->max_layer_depth_allowed == 0) { if (features != NULL) features->sb_features.tpl_features.available = 0; return;
}
for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { int offsetr = row; int offsetc = col;
// If there is at least one lossless segment, force the skip for intra // block to be 0, in order to avoid the segment_id to be changed by in // write_segment_id(). if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
cpi->enc_seg.has_lossless_segment)
mbmi->skip_txfm = 0;
// Check to make sure that the adjustments above have not caused the // rd multiplier to be truncated to 0.
x->rdmult = (x->rdmult > 0) ? x->rdmult : 1;
}
// Set up destination pointers.
av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0,
num_planes);
// Set up limit values for MV components. // Mv beyond the range do not produce new/different prediction block.
av1_set_mv_limits(&cm->mi_params, &x->mv_limits, mi_row, mi_col, mi_height,
mi_width, cpi->oxcf.border_in_pixels);
// Set up distance of MB to edge of frame in 1/8th pel units.
assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
cm->mi_params.mi_rows, cm->mi_params.mi_cols);
// Set up source buffers.
av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
// required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
xd->tile = *tile;
}
void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
MACROBLOCK *const x, int mi_row, int mi_col,
BLOCK_SIZE bsize) { const AV1_COMMON *const cm = &cpi->common; conststruct segmentation *const seg = &cm->seg;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi;
/*!\brief Hybrid intra mode search. * * \ingroup intra_mode_search * \callgraph * \callergraph * This is top level function for mode search for intra frames in non-RD * optimized case. Depending on speed feature and block size it calls * either non-RD or RD optimized intra mode search. * * \param[in] cpi Top-level encoder structure * \param[in] x Pointer to structure holding all the data for the current macroblock * \param[in] rd_cost Struct to keep track of the RD information * \param[in] bsize Current block size * \param[in] ctx Structure to hold snapshot of coding context during the mode picking process * * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x * is modified to store information about the best mode computed * in this function. The rd_cost struct is also updated with the RD stats * corresponding to the best mode found.
*/
staticinlinevoid hybrid_intra_mode_search(AV1_COMP *cpi, MACROBLOCK *const x,
RD_STATS *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx) { int use_rdopt = 0; constint hybrid_intra_pickmode = cpi->sf.rt_sf.hybrid_intra_pickmode; // Use rd pick for intra mode search based on block size and variance. if (hybrid_intra_pickmode && bsize < BLOCK_16X16) { unsignedint var_thresh[3] = { 0, 101, 201 };
assert(hybrid_intra_pickmode <= 3); if (x->source_variance >= var_thresh[hybrid_intra_pickmode - 1])
use_rdopt = 1;
}
// For real time/allintra row-mt enabled multi-threaded encoding with cost // update frequency set to COST_UPD_TILE/COST_UPD_OFF, tile ctxt is not updated // at superblock level. Thus, it is not required for the encoding of top-right // superblock be complete for updating tile ctxt. However, when encoding a block // whose right edge is also the superblock edge, intra and inter mode evaluation // (ref mv list population) require the encoding of the top-right superblock to // be complete. So, here, we delay the waiting of threads until the need for the // data from the top-right superblock region. staticinlinevoid wait_for_top_right_sb(AV1EncRowMultiThreadInfo *enc_row_mt,
AV1EncRowMultiThreadSync *row_mt_sync,
TileInfo *tile_info,
BLOCK_SIZE sb_size, int sb_mi_size_log2, BLOCK_SIZE bsize, int mi_row, int mi_col) { constint sb_size_in_mi = mi_size_wide[sb_size]; constint bw_in_mi = mi_size_wide[bsize]; constint blk_row_in_sb = mi_row & (sb_size_in_mi - 1); constint blk_col_in_sb = mi_col & (sb_size_in_mi - 1); constint top_right_block_in_sb =
(blk_row_in_sb == 0) && (blk_col_in_sb + bw_in_mi >= sb_size_in_mi);
// Don't wait if the block is the not the top-right block in the superblock. if (!top_right_block_in_sb) return;
// Wait for the top-right superblock to finish encoding. constint sb_row_in_tile =
(mi_row - tile_info->mi_row_start) >> sb_mi_size_log2; constint sb_col_in_tile =
(mi_col - tile_info->mi_col_start) >> sb_mi_size_log2;
/*!\brief Interface for AV1 mode search for an individual coding block * * \ingroup partition_search * \callgraph * \callergraph * Searches prediction modes, transform, and coefficient coding modes for an * individual coding block. This function is the top-level interface that * directs the encoder to the proper mode search function, among these * implemented for inter/intra + rd/non-rd + non-skip segment/skip segment. * * \param[in] cpi Top-level encoder structure * \param[in] tile_data Pointer to struct holding adaptive * data/contexts/models for the tile during * encoding * \param[in] x Pointer to structure holding all the data for * the current macroblock * \param[in] mi_row Row coordinate of the block in a step size of * MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of * MI_SIZE * \param[in] rd_cost Pointer to structure holding rate and distortion * stats for the current block * \param[in] partition Partition mode of the parent block * \param[in] bsize Current block size * \param[in] ctx Pointer to structure holding coding contexts and * chosen modes for the current block * \param[in] best_rd Upper bound of rd cost of a valid partition * * \remark Nothing is returned. Instead, the chosen modes and contexts necessary * for reconstruction are stored in ctx, the rate-distortion stats are stored in * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be * signalled by an INT64_MAX rd_cost->rdcost.
*/ staticvoid pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_STATS *rd_cost, PARTITION_TYPE partition,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
RD_STATS best_rd) { if (cpi->sf.part_sf.use_best_rd_for_pruning && best_rd.rdcost < 0) {
ctx->rd_stats.rdcost = INT64_MAX;
ctx->rd_stats.skip_txfm = 0;
av1_invalid_rd_stats(rd_cost); return;
}
// This is only needed for real time/allintra row-mt enabled multi-threaded // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF.
wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync,
&tile_data->tile_info, cm->seq_params->sb_size,
cm->seq_params->mib_size_log2, bsize, mi_row, mi_col);
// Sets up the tx_type_map buffer in MACROBLOCKD.
xd->tx_type_map = txfm_info->tx_type_map_;
xd->tx_type_map_stride = mi_size_wide[bsize];
for (i = 0; i < num_planes; ++i) {
p[i].coeff = ctx->coeff[i];
p[i].qcoeff = ctx->qcoeff[i];
p[i].dqcoeff = ctx->dqcoeff[i];
p[i].eobs = ctx->eobs[i];
p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
}
for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
ctx->skippable = 0; // Set to zero to make sure we do not use the previous encoded frame stats
mbmi->skip_txfm = 0; // Reset skip mode flag.
mbmi->skip_mode = 0;
// Save rdmult before it might be changed, so it can be restored later. constint orig_rdmult = x->rdmult;
setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi); // Set error per bit for current rdmult
av1_set_error_per_bit(&x->errorperbit, x->rdmult);
av1_rd_cost_update(x->rdmult, &best_rd);
// If set best_rd.rdcost to INT64_MAX, the encoder will not use any previous // rdcost information for the following mode search. // Disabling the feature could get some coding gain, with encoder slowdown. if (!cpi->sf.part_sf.use_best_rd_for_pruning) {
av1_invalid_rd_stats(&best_rd);
}
// Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (frame_is_intra_only(cm)) { #if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, av1_rd_pick_intra_mode_sb_time); #endif
av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost); #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, av1_rd_pick_intra_mode_sb_time); #endif
} else { #if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, av1_rd_pick_inter_mode_sb_time); #endif if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
rd_cost, bsize, ctx, best_rd.rdcost);
} else {
av1_rd_pick_inter_mode(cpi, tile_data, x, rd_cost, bsize, ctx,
best_rd.rdcost);
} #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, av1_rd_pick_inter_mode_sb_time); #endif
}
// Examine the resulting rate and for AQ mode 2 make a segment choice. if (rd_cost->rate != INT_MAX && aq_mode == COMPLEXITY_AQ &&
bsize >= BLOCK_16X16) {
av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
}
x->rdmult = orig_rdmult;
// TODO(jingning) The rate-distortion optimization flow needs to be // refactored to provide proper exit/return handle. if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
if (!seg_ref_active) { #if CONFIG_ENTROPY_STATS
counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++; #endif
update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
inter_block, 2); // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from // the reference frame counts used to work out probabilities. if (inter_block) { const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0]; const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1]; if (current_frame->reference_mode == REFERENCE_MODE_SELECT) { if (is_comp_ref_allowed(bsize)) { #if CONFIG_ENTROPY_STATS
counts->comp_inter[av1_get_reference_mode_context(xd)]
[has_second_ref(mbmi)]++; #endif// CONFIG_ENTROPY_STATS
update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), 2);
}
}
/*!\brief Reconstructs an individual coding block * * \ingroup partition_search * Reconstructs an individual coding block by applying the chosen modes stored * in ctx, also updates mode counts and entropy models. * * \param[in] cpi Top-level encoder structure * \param[in] tile_data Pointer to struct holding adaptive * data/contexts/models for the tile during encoding * \param[in] td Pointer to thread data * \param[in] tp Pointer to the starting token * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of * MI_SIZE * \param[in] dry_run A code indicating whether it is part of the final * pass for reconstructing the superblock * \param[in] bsize Current block size * \param[in] partition Partition mode of the parent block * \param[in] ctx Pointer to structure holding coding contexts and the * chosen modes for the current block * \param[in] rate Pointer to the total rate for the current block * * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters) * will be updated in the pixel buffers in td->mb.e_mbd. Also, the chosen modes * will be stored in the MB_MODE_INFO buffer td->mb.e_mbd.mi[0].
*/ staticvoid encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
ThreadData *td, TokenExtra **tp, int mi_row, int mi_col,
RUN_TYPE dry_run, BLOCK_SIZE bsize,
PARTITION_TYPE partition, PICK_MODE_CONTEXT *const ctx, int *rate) { const AV1_COMMON *const cm = &cpi->common;
TileInfo *const tile = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd; constint subsampling_x = cm->seq_params->subsampling_x; constint subsampling_y = cm->seq_params->subsampling_y;
// delta quant applies to both intra and inter constint super_block_upper_left =
((mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
((mi_col & (cm->seq_params->mib_size - 1)) == 0); const DeltaQInfo *const delta_q_info = &cm->delta_q_info; if (delta_q_info->delta_q_present_flag &&
(bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) &&
super_block_upper_left) {
xd->current_base_qindex = mbmi->current_qindex; if (delta_q_info->delta_lf_present_flag) { if (delta_q_info->delta_lf_multi) { constint frame_lf_count =
av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
}
} else {
xd->delta_lf_from_base = mbmi->delta_lf_from_base;
}
}
}
RD_COUNTS *rdc = &td->rd_counts; if (mbmi->skip_mode) {
assert(!frame_is_intra_only(cm));
rdc->skip_mode_used_flag = 1; if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
assert(has_second_ref(mbmi));
rdc->compound_ref_used_flag = 1;
}
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
} else { constint seg_ref_active =
segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); if (!seg_ref_active) { // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from // the reference frame counts used to work out probabilities. if (is_inter_block(mbmi)) {
av1_collect_neighbors_ref_counts(xd); if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { if (has_second_ref(mbmi)) { // This flag is also updated for 4x4 blocks
rdc->compound_ref_used_flag = 1;
}
}
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
}
}
}
if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
if (mbmi->ref_frame[1] != INTRA_FRAME) { if (motion_allowed >= OBMC_CAUSAL) {
td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
} if (motion_allowed == WARPED_CAUSAL) {
td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++;
}
}
}
}
} // TODO(Ravi/Remya): Move this copy function to a better logical place // This function will copy the best mode information from block // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during // bitstream preparation.
av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext,
av1_ref_frame_type(xd->mi[0]->ref_frame));
x->rdmult = origin_mult;
}
/*!\brief Reconstructs a partition (may contain multiple coding blocks) * * \ingroup partition_search * Reconstructs a sub-partition of the superblock by applying the chosen modes * and partition trees stored in pc_tree. * * \param[in] cpi Top-level encoder structure * \param[in] td Pointer to thread data * \param[in] tile_data Pointer to struct holding adaptive * data/contexts/models for the tile during encoding * \param[in] tp Pointer to the starting token * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of * MI_SIZE * \param[in] dry_run A code indicating whether it is part of the final * pass for reconstructing the superblock * \param[in] bsize Current block size * \param[in] pc_tree Pointer to the PC_TREE node storing the picked * partitions and mode info for the current block * \param[in] rate Pointer to the total rate for the current block * * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters) * will be updated in the pixel buffers in td->mb.e_mbd.
*/ staticvoid encode_sb(const AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TokenExtra **tp, int mi_row, int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
PC_TREE *pc_tree, int *rate) {
assert(bsize < BLOCK_SIZES_ALL); const AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
assert(bsize < BLOCK_SIZES_ALL); constint hbs = mi_size_wide[bsize] / 2; constint is_partition_root = bsize >= BLOCK_8X8; constint ctx = is_partition_root
? partition_plane_context(xd, mi_row, mi_col, bsize)
: -1; const PARTITION_TYPE partition = pc_tree->partitioning; const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); #if !CONFIG_REALTIME_ONLY int quarter_step = mi_size_wide[bsize] / 4; int i;
BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); #endif
if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; if (subsize == BLOCK_INVALID) return;
/*!\brief AV1 block partition search (partition estimation and partial search). * * \ingroup partition_search * Encode the block by applying pre-calculated partition patterns that are * represented by coding block sizes stored in the mbmi array. Minor partition * adjustments are tested and applied if they lead to lower rd costs. The * partition types are limited to a basic set: none, horz, vert, and split. * * \param[in] cpi Top-level encoder structure * \param[in] td Pointer to thread data * \param[in] tile_data Pointer to struct holding adaptive data/contexts/models for the tile during encoding * \param[in] mib Array representing MB_MODE_INFO pointers for mi blocks starting from the first pixel of the current block * \param[in] tp Pointer to the starting token * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of MI_SIZE * \param[in] bsize Current block size * \param[in] rate Pointer to the final rate for encoding the current block * \param[in] dist Pointer to the final distortion of the current block * \param[in] do_recon Whether the reconstruction function needs to be run, either for finalizing a superblock or providing reference for future sub-partitions * \param[in] pc_tree Pointer to the PC_TREE node holding the picked partitions and mode info for the current block * * \remark Nothing is returned. The pc_tree struct is modified to store the * picked partition and modes. The rate and dist are also updated with those * corresponding to the best partition found.
*/ void av1_rd_use_partition(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
MB_MODE_INFO **mib, TokenExtra **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate,
int64_t *dist, int do_recon, PC_TREE *pc_tree) {
AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; constint num_planes = av1_num_planes(cm);
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd; const ModeCosts *mode_costs = &x->mode_costs; constint bs = mi_size_wide[bsize]; constint hbs = bs / 2; constint pl = (bsize >= BLOCK_8X8)
? partition_plane_context(xd, mi_row, mi_col, bsize)
: 0; const PARTITION_TYPE partition =
(bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
: PARTITION_NONE; const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
RD_STATS last_part_rdc, none_rdc, chosen_rdc, invalid_rdc;
BLOCK_SIZE bs_type = mib[0]->bsize; int use_partition_none = 0;
x->try_merge_partition = 0;
if (pc_tree->none == NULL) {
pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf); if (!pc_tree->none)
aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PICK_MODE_CONTEXT");
}
PICK_MODE_CONTEXT *ctx_none = pc_tree->none;
if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
assert(mi_size_wide[bsize] == mi_size_high[bsize]); // In rt mode, currently the min partition size is BLOCK_8X8.
assert(bsize >= cpi->sf.part_sf.default_min_partition_size);
// Save rdmult before it might be changed, so it can be restored later. constint orig_rdmult = x->rdmult;
setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
// If last_part is better set the partitioning to that. if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
mib[0]->bsize = bs_type; if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
chosen_rdc = last_part_rdc;
} // If none was better set the partitioning to that. if (none_rdc.rdcost < INT64_MAX &&
none_rdc.rdcost - (none_rdc.rdcost >> 9) < chosen_rdc.rdcost) {
mib[0]->bsize = bsize; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
chosen_rdc = none_rdc;
}
// We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. if (bsize == cm->seq_params->sb_size)
assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, encode_sb_time); #endif if (do_recon) { if (bsize == cm->seq_params->sb_size) { // NOTE: To get estimate for rate due to the tokens, use: // int rate_coeffs = 0; // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, // bsize, pc_tree, &rate_coeffs);
set_cb_offsets(x->cb_offset, 0, 0);
encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
pc_tree, NULL);
} else {
encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
pc_tree, NULL);
}
} #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, encode_sb_time); #endif
encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate); if (!dry_run) {
update_cb_offsets(x, bsize, subsampling_x, subsampling_y); if (has_second_ref(mbmi)) { if (mbmi->compound_idx == 0 ||
mbmi->interinter_comp.type == COMPOUND_AVERAGE)
mbmi->comp_group_idx = 0; else
mbmi->comp_group_idx = 1;
mbmi->compound_idx = 1;
}
RD_COUNTS *const rdc = &td->rd_counts; if (mbmi->skip_mode) {
assert(!frame_is_intra_only(cm));
rdc->skip_mode_used_flag = 1; if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
has_second_ref(mbmi)) {
rdc->compound_ref_used_flag = 1;
}
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
} else { constint seg_ref_active =
segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); if (!seg_ref_active) { // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from // the reference frame counts used to work out probabilities. if (is_inter_block(mbmi)) {
av1_collect_neighbors_ref_counts(xd); if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
has_second_ref(mbmi)) { // This flag is also updated for 4x4 blocks
rdc->compound_ref_used_flag = 1;
}
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
}
}
} if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY &&
(mbmi->mode == NEWMV || mbmi->mode < INTRA_MODE_END)) {
int32_t blocks = mi_size_high[bsize] * mi_size_wide[bsize];
rdc->newmv_or_intra_blocks += blocks;
} if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
} if ((cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ ||
cpi->active_map.enabled) &&
mbmi->skip_txfm && !cpi->rc.rtc_external_ratectrl && cm->seg.enabled)
av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize, dry_run); // TODO(Ravi/Remya): Move this copy function to a better logical place // This function will copy the best mode information from block // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during // bitstream preparation.
av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext,
av1_ref_frame_type(xd->mi[0]->ref_frame));
x->rdmult = origin_mult; #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing((AV1_COMP *)cpi, encode_b_nonrd_time); #endif
}
staticint get_force_zeromv_skip_flag_for_blk(const AV1_COMP *cpi, const MACROBLOCK *x,
BLOCK_SIZE bsize) { // Force zero MV skip based on SB level decision if (x->force_zeromv_skip_for_sb < 2) return x->force_zeromv_skip_for_sb;
// For blocks of size equal to superblock size, the decision would have been // already done at superblock level. Hence zeromv-skip decision is skipped. const AV1_COMMON *const cm = &cpi->common; if (bsize == cm->seq_params->sb_size) return 0;
/*!\brief Top level function to pick block mode for non-RD optimized case * * \ingroup partition_search * \callgraph * \callergraph * Searches prediction modes, transform, and coefficient coding modes for an * individual coding block. This function is the top-level function that is * used for non-RD optimized mode search (controlled by * \c cpi->sf.rt_sf.use_nonrd_pick_mode). Depending on frame type it calls * inter/skip/hybrid-intra mode search functions * * \param[in] cpi Top-level encoder structure * \param[in] tile_data Pointer to struct holding adaptive * data/contexts/models for the tile during * encoding * \param[in] x Pointer to structure holding all the data for * the current macroblock * \param[in] mi_row Row coordinate of the block in a step size of * MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of * MI_SIZE * \param[in] rd_cost Pointer to structure holding rate and distortion * stats for the current block * \param[in] bsize Current block size * \param[in] ctx Pointer to structure holding coding contexts and * chosen modes for the current block * * \remark Nothing is returned. Instead, the chosen modes and contexts necessary * for reconstruction are stored in ctx, the rate-distortion stats are stored in * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be * signalled by an INT64_MAX rd_cost->rdcost.
*/ staticvoid pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_STATS *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx) { // For nonrd mode, av1_set_offsets is already called at the superblock level // in encode_nonrd_sb when we determine the partitioning. if (bsize != cpi->common.seq_params->sb_size ||
cpi->sf.rt_sf.nonrd_check_partition_split == 1) {
av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
}
assert(x->last_set_offsets_loc.mi_row == mi_row &&
x->last_set_offsets_loc.mi_col == mi_col &&
x->last_set_offsets_loc.bsize == bsize);
AV1_COMMON *const cm = &cpi->common; constint num_planes = av1_num_planes(cm);
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = xd->mi[0]; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode;
TxfmSearchInfo *txfm_info = &x->txfm_search_info; int i; constint seg_skip =
segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
// This is only needed for real time/allintra row-mt enabled multi-threaded // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF.
wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync,
&tile_data->tile_info, cm->seq_params->sb_size,
cm->seq_params->mib_size_log2, bsize, mi_row, mi_col);
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, pick_sb_modes_nonrd_time); #endif // Sets up the tx_type_map buffer in MACROBLOCKD.
xd->tx_type_map = txfm_info->tx_type_map_;
xd->tx_type_map_stride = mi_size_wide[bsize]; for (i = 0; i < num_planes; ++i) {
p[i].coeff = ctx->coeff[i];
p[i].qcoeff = ctx->qcoeff[i];
p[i].dqcoeff = ctx->dqcoeff[i];
p[i].eobs = ctx->eobs[i];
p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
} for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
if (!seg_skip) {
x->force_zeromv_skip_for_blk =
get_force_zeromv_skip_flag_for_blk(cpi, x, bsize);
// Source variance may be already compute at superblock level, so no need // to recompute, unless bsize < sb_size or source_variance is not yet set. if (!x->force_zeromv_skip_for_blk &&
(x->source_variance == UINT_MAX || bsize < cm->seq_params->sb_size))
x->source_variance = av1_get_perpixel_variance_facade(
cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
}
// Save rdmult before it might be changed, so it can be restored later. constint orig_rdmult = x->rdmult;
setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi); // Set error per bit for current rdmult
av1_set_error_per_bit(&x->errorperbit, x->rdmult); // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (frame_is_intra_only(cm)) { #if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, hybrid_intra_mode_search_time); #endif
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, hybrid_intra_mode_search_time); #endif
} else { #if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, nonrd_pick_inter_mode_sb_time); #endif if (seg_skip) {
x->force_zeromv_skip_for_blk = 1; // TODO(marpan): Consider adding a function for nonrd: // av1_nonrd_pick_inter_mode_sb_seg_skip(), instead of setting // x->force_zeromv_skip flag and entering av1_nonrd_pick_inter_mode_sb().
}
av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx); #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, nonrd_pick_inter_mode_sb_time); #endif
} if (cpi->sf.rt_sf.skip_cdef_sb) { // cdef_strength is initialized to 1 which means skip_cdef, and is updated // here. Check to see is skipping cdef is allowed. Never skip on slide/scene // change, near a key frame, or when color sensitivity is set. Always allow // cdef_skip for seg_skip = 1. constint allow_cdef_skipping =
seg_skip ||
(cpi->rc.frames_since_key > 10 && !cpi->rc.high_source_sad &&
!(x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] ||
x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)]));
// Find the corresponding 64x64 block. It'll be the 128x128 block if that's // the block size. constint mi_row_sb = mi_row - mi_row % MI_SIZE_64X64; constint mi_col_sb = mi_col - mi_col % MI_SIZE_64X64;
MB_MODE_INFO **mi_sb =
cm->mi_params.mi_grid_base +
get_mi_grid_idx(&cm->mi_params, mi_row_sb, mi_col_sb); constint is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; unsignedint thresh_spatial_var =
(cpi->oxcf.speed >= 11 && !is_720p_or_larger &&
cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN)
? 400
: UINT_MAX; // For skip_cdef_sb = 1: do not skip if allow_cdef_skipping is false or // intra or new mv is picked, with possible conidition on spatial variance. // For skip_cdef_sb >= 2: more aggressive mode to always skip unless // allow_cdef_skipping is false and source_variance is non-zero. if (cpi->sf.rt_sf.skip_cdef_sb >= 2) {
mi_sb[0]->cdef_strength =
mi_sb[0]->cdef_strength &&
(allow_cdef_skipping || x->source_variance == 0);
} else {
mi_sb[0]->cdef_strength =
mi_sb[0]->cdef_strength && allow_cdef_skipping &&
!(x->source_variance < thresh_spatial_var &&
(mbmi->mode < INTRA_MODES || mbmi->mode == NEWMV));
} // Store in the pickmode context.
ctx->mic.cdef_strength = mi_sb[0]->cdef_strength;
}
x->rdmult = orig_rdmult;
ctx->rd_stats.rate = rd_cost->rate;
ctx->rd_stats.dist = rd_cost->dist;
ctx->rd_stats.rdcost = rd_cost->rdcost; #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, pick_sb_modes_nonrd_time); #endif
}
// When model based skip is not used (i.e.,use_model_yrd_large = 0), skip_txfm // would have been populated based on Hadamard transform and skip_txfm flag is // more reliable. Hence SPLIT evaluation is disabled at all quantizers for 8x8 // and 16x16 blocks. // When model based skip is used (i.e.,use_model_yrd_large = 1), skip_txfm may // not be reliable. Hence SPLIT evaluation is disabled only at lower // quantizers for blocks >= 32x32. if ((!use_model_yrd_large) || (!is_larger_qindex)) returnfalse;
// Use residual statistics to decide if SPLIT partition should be evaluated // for 32x32 blocks. The pruning logic is avoided for larger block size to // avoid the visual artifacts if (pc_tree->none->mic.mode == NEWMV && bsize == BLOCK_32X32 && do_split) { const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
assert(subsize < BLOCK_SIZES_ALL); double min_per_pixel_error = DBL_MAX; double max_per_pixel_error = 0.; int i; for (i = 0; i < SUB_PARTITIONS_SPLIT; i++) { constint x_idx = (i & 1) * hbs; constint y_idx = (i >> 1) * hbs; if ((mi_row + y_idx >= mi_params->mi_rows) ||
(mi_col + x_idx >= mi_params->mi_cols)) { break;
}
// Populate the appropriate buffer pointers. // Pass scale factors as NULL as the base pointer of the block would have // been calculated appropriately. struct buf_2d src_split_buf_2d, pred_split_buf_2d; conststruct buf_2d *src_none_buf_2d = &x->plane[AOM_PLANE_Y].src;
setup_pred_plane(&src_split_buf_2d, subsize, src_none_buf_2d->buf,
src_none_buf_2d->width, src_none_buf_2d->height,
src_none_buf_2d->stride, y_idx, x_idx, NULL, 0, 0); conststruct buf_2d *pred_none_buf_2d = &xd->plane[AOM_PLANE_Y].dst;
setup_pred_plane(&pred_split_buf_2d, subsize, pred_none_buf_2d->buf,
pred_none_buf_2d->width, pred_none_buf_2d->height,
pred_none_buf_2d->stride, y_idx, x_idx, NULL, 0, 0);
// Prune based on residual statistics only if all the sub-partitions are // valid. if (i == SUB_PARTITIONS_SPLIT) { if (max_per_pixel_error - min_per_pixel_error <= 1.5) do_split = false;
}
}
if (none_rdc.rdcost < split_rdc.rdcost) { /* Predicted samples can not be reused for PARTITION_NONE since same * buffer is being used to store the reconstructed samples of
* PARTITION_SPLIT block. */ if (do_split) x->reuse_inter_pred = false;
mib[0]->bsize = bsize;
pc_tree->partitioning = PARTITION_NONE;
encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
pc_tree->none, NULL);
} else {
mib[0]->bsize = subsize;
pc_tree->partitioning = PARTITION_SPLIT; /* Predicted samples can not be reused for PARTITION_SPLIT since same
* buffer is being used to write the reconstructed samples. */ // TODO(Cherma): Store and reuse predicted samples generated by // encode_b_nonrd() in DRY_RUN_NORMAL mode.
x->reuse_inter_pred = false;
for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { int x_idx = (i & 1) * hbs; int y_idx = (i >> 1) * hbs; if ((mi_row + y_idx >= mi_params->mi_rows) ||
(mi_col + x_idx >= mi_params->mi_cols)) continue;
// Note: We don't reset pc_tree->split[i]->none here because it // could contain results from the additional check. Instead, it is // reset before we enter the nonrd_check_partition_merge_mode // condition. if (!pc_tree->split[i]->none) {
pc_tree->split[i]->none =
av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); if (!pc_tree->split[i]->none)
aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PICK_MODE_CONTEXT");
}
encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL);
}
}
}
// Evaluate if the sub-partitions can be merged directly into a large partition // without calculating the RD cost. staticvoid direct_partition_merging(AV1_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, MB_MODE_INFO **mib, int mi_row, int mi_col, BLOCK_SIZE bsize) {
AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd; constint bs = mi_size_wide[bsize]; constint hbs = bs / 2; const PARTITION_TYPE partition =
(bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
: PARTITION_NONE;
BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
// Check if the following conditions are met. This can be updated // later with more support added. constint further_split = b0[0]->bsize < subsize || b1[0]->bsize < subsize ||
b2[0]->bsize < subsize || b3[0]->bsize < subsize; if (further_split) return;
// TODO(yunqing): functions called below can be optimized by // removing unrelated operations.
av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row,
mi_col, bsize);
const MV_REFERENCE_FRAME ref_frame = this_mi[0]->ref_frame[0];
int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES]; struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; int force_skip_low_temp_var = 0; int skip_pred_mv = 0; bool use_scaled_ref;
for (int i = 0; i < MB_MODE_COUNT; ++i) { for (int j = 0; j < REF_FRAMES; ++j) {
frame_mv[i][j].as_int = INVALID_MV;
}
}
av1_copy(x->color_sensitivity, x->color_sensitivity_sb);
skip_pred_mv = (x->nonrd_prune_ref_frame_search > 2 &&
x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] != 2 &&
x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)] != 2);
// Update mi for this partition block. for (int y = 0; y < bs; y++) { for (int x_idx = 0; x_idx < bs; x_idx++) {
this_mi[x_idx + y * mi_params->mi_stride] = this_mi[0];
}
}
}
}
/*!\brief AV1 block partition application (minimal RD search). * * \ingroup partition_search * \callgraph * \callergraph * Encode the block by applying pre-calculated partition patterns that are * represented by coding block sizes stored in the mbmi array. The only * partition adjustment allowed is merging leaf split nodes if it leads to a * lower rd cost. The partition types are limited to a basic set: none, horz, * vert, and split. This function is only used in the real-time mode. * * \param[in] cpi Top-level encoder structure * \param[in] td Pointer to thread data * \param[in] tile_data Pointer to struct holding adaptive data/contexts/models for the tile during encoding * \param[in] mib Array representing MB_MODE_INFO pointers for mi blocks starting from the first pixel of the current block * \param[in] tp Pointer to the starting token * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of MI_SIZE * \param[in] bsize Current block size * \param[in] pc_tree Pointer to the PC_TREE node holding the picked partitions and mode info for the current block * * \remark Nothing is returned. The pc_tree struct is modified to store the * picked partition and modes.
*/ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, MB_MODE_INFO **mib,
TokenExtra **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, PC_TREE *pc_tree) {
AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd; const ModeCosts *mode_costs = &x->mode_costs; // Only square blocks from 8x8 to 128x128 are supported
assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128); constint bs = mi_size_wide[bsize]; constint hbs = bs / 2;
PARTITION_TYPE partition = (bsize >= BLOCK_8X8)
? get_partition(cm, mi_row, mi_col, bsize)
: PARTITION_NONE;
BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
assert(subsize <= BLOCK_LARGEST); constint pl = (bsize >= BLOCK_8X8)
? partition_plane_context(xd, mi_row, mi_col, bsize)
: 0;
switch (partition) { case PARTITION_NONE: if (!pc_tree->none) {
pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf); if (!pc_tree->none)
aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PICK_MODE_CONTEXT");
} else {
av1_reset_pmc(pc_tree->none);
}
pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost, bsize,
pc_tree->none);
encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize,
partition, pc_tree->none, NULL); break; case PARTITION_VERT: for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { if (!pc_tree->vertical[i]) {
pc_tree->vertical[i] =
av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); if (!pc_tree->vertical[i])
aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PICK_MODE_CONTEXT");
} else {
av1_reset_pmc(pc_tree->vertical[i]);
}
}
pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
subsize, pc_tree->vertical[0]);
encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
PARTITION_VERT, pc_tree->vertical[0], NULL); if (mi_col + hbs < mi_params->mi_cols && bsize > BLOCK_8X8) {
pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col + hbs,
&dummy_cost, subsize, pc_tree->vertical[1]);
encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize,
PARTITION_VERT, pc_tree->vertical[1], NULL);
} break; case PARTITION_HORZ: for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { if (!pc_tree->horizontal[i]) {
pc_tree->horizontal[i] =
av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); if (!pc_tree->horizontal[i])
aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PICK_MODE_CONTEXT");
} else {
av1_reset_pmc(pc_tree->horizontal[i]);
}
}
pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
subsize, pc_tree->horizontal[0]);
encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
PARTITION_HORZ, pc_tree->horizontal[0], NULL);
if (mi_row + hbs < mi_params->mi_rows && bsize > BLOCK_8X8) {
pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + hbs, mi_col,
&dummy_cost, subsize, pc_tree->horizontal[1]);
encode_b_nonrd(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize,
PARTITION_HORZ, pc_tree->horizontal[1], NULL);
} break; case PARTITION_SPLIT: for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { if (!pc_tree->split[i]) {
pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); if (!pc_tree->split[i])
aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PC_TREE");
}
pc_tree->split[i]->index = i;
} if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
av1_is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
!frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
try_merge(cpi, td, tile_data, mib, tp, mi_row, mi_col, bsize, pc_tree,
partition, subsize, pl);
} else { for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { int x_idx = (i & 1) * hbs; int y_idx = (i >> 1) * hbs; int jj = i >> 1, ii = i & 0x01; if ((mi_row + y_idx >= mi_params->mi_rows) ||
(mi_col + x_idx >= mi_params->mi_cols)) continue;
av1_nonrd_use_partition(
cpi, td, tile_data,
mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp,
mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]);
}
if (!change_none_to_split) { // Note: Palette, cfl are not supported. if (!frame_is_intra_only(cm) && !tile_data->allow_update_cdf &&
cpi->sf.rt_sf.partition_direct_merging &&
mode_costs->partition_cost[pl][PARTITION_NONE] <
mode_costs->partition_cost[pl][PARTITION_SPLIT] &&
(mi_row + bs <= mi_params->mi_rows) &&
(mi_col + bs <= mi_params->mi_cols)) {
direct_partition_merging(cpi, td, tile_data, mib, mi_row, mi_col,
bsize);
}
}
} break; case PARTITION_VERT_A: case PARTITION_VERT_B: case PARTITION_HORZ_A: case PARTITION_HORZ_B: case PARTITION_HORZ_4: case PARTITION_VERT_4:
assert(0 && "Cannot handle extended partition types"); default: assert(0); break;
}
}
#if !CONFIG_REALTIME_ONLY // Try searching for an encoding for the given subblock. Returns zero if the // rdcost is already too high (to tell the caller not to bother searching for // encodings of further subblocks). staticint rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TokenExtra **tp, int is_last, int mi_row, int mi_col, BLOCK_SIZE subsize,
RD_STATS best_rdcost, RD_STATS *sum_rdc,
PARTITION_TYPE partition,
PICK_MODE_CONTEXT *this_ctx) {
MACROBLOCK *const x = &td->mb; constint orig_mult = x->rdmult;
setup_block_rdmult(cpi, x, mi_row, mi_col, subsize, NO_AQ, NULL);
// Initialize HORZ and VERT win flags as true for all split partitions. for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
part_search_state->split_part_rect_win[i].rect_part_win[HORZ] = true;
part_search_state->split_part_rect_win[i].rect_part_win[VERT] = true;
}
// Initialize the rd cost.
av1_init_rd_stats(&part_search_state->this_rdc);
// Initialize RD costs for partition types to 0.
part_search_state->none_rd = 0;
av1_zero(part_search_state->split_rd);
av1_zero(part_search_state->rect_part_rd);
// Initialize SPLIT partition to be not ready.
av1_zero(part_search_state->is_split_ctx_is_ready); // Initialize HORZ and VERT partitions to be not ready.
av1_zero(part_search_state->is_rect_ctx_is_ready);
// Reset the flag indicating whether a partition leading to a rdcost lower // than the bound best_rdc has been found.
part_search_state->found_best_partition = false;
// mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][0]: mi_row postion of // HORZ and VERT partition types. // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][1]: mi_col postion of // HORZ and VERT partition types. constint mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][2] = {
{ { blk_params.mi_row, blk_params.mi_col },
{ blk_params.mi_row_edge, blk_params.mi_col } },
{ { blk_params.mi_row, blk_params.mi_col },
{ blk_params.mi_row, blk_params.mi_col_edge } }
};
// Initialize active edge_type function pointer // for HOZR and VERT partition types.
active_edge_info active_edge_type[NUM_RECT_PARTS] = { av1_active_h_edge,
av1_active_v_edge };
// Indicates edge blocks for HORZ and VERT partition types. constint is_not_edge_block[NUM_RECT_PARTS] = { blk_params.has_rows,
blk_params.has_cols };
// Initialize pc tree context for HORZ and VERT partition types.
PICK_MODE_CONTEXT **cur_ctx[NUM_RECT_PARTS][SUB_PARTITIONS_RECT] = {
{ &pc_tree->horizontal[0], &pc_tree->horizontal[1] },
{ &pc_tree->vertical[0], &pc_tree->vertical[1] }
};
// Loop over rectangular partition types. for (RECT_PART_TYPE i = start_type; i <= end_type; i++) {
assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
!part_search_state->partition_rect_allowed[i]));
// Check if the HORZ / VERT partition search is to be performed. if (!is_rect_part_allowed(cpi, part_search_state, active_edge_type, i,
mi_pos_rect[i][0][i])) continue;
// First sub-partition evaluation in HORZ / VERT partition type.
rd_pick_rect_partition(
cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state,
best_rdc, 0, mi_pos_rect[i][sub_part_idx][0],
mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type);
// Start of second sub-partition evaluation. // Evaluate second sub-partition if the first sub-partition cost // is less than the best cost and if it is not an edge block. if (sum_rdc->rdcost < best_rdc->rdcost && is_not_edge_block[i]) { const MB_MODE_INFO *const mbmi = &cur_ctx[i][sub_part_idx][0]->mic; const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; // Neither palette mode nor cfl predicted. if (pmi->palette_size[PLANE_TYPE_Y] == 0 &&
pmi->palette_size[PLANE_TYPE_UV] == 0) { if (mbmi->uv_mode != UV_CFL_PRED)
part_search_state->is_rect_ctx_is_ready[i] = 1;
}
av1_update_state(cpi, td, cur_ctx[i][sub_part_idx][0], blk_params.mi_row,
blk_params.mi_col, blk_params.subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL,
blk_params.subsize, NULL);
// Second sub-partition evaluation in HORZ / VERT partition type.
sub_part_idx = 1;
rd_pick_rect_partition(
cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state,
best_rdc, 1, mi_pos_rect[i][sub_part_idx][0],
mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type);
} // Update HORZ / VERT best partition. if (sum_rdc->rdcost < best_rdc->rdcost) {
sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, sum_rdc->dist); if (sum_rdc->rdcost < best_rdc->rdcost) {
*best_rdc = *sum_rdc;
part_search_state->found_best_partition = true;
pc_tree->partitioning = partition_type;
}
} else { // Update HORZ / VERT win flag. if (rect_part_win_info != NULL)
rect_part_win_info->rect_part_win[i] = false;
} #if CONFIG_COLLECT_PARTITION_STATS if (part_timing_stats->timer_is_on) {
end_partition_block_timer(part_timing_stats, partition_type,
sum_rdc->rdcost);
} #endif
av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col,
blk_params.bsize, av1_num_planes(cm));
}
}
// Loop over AB partition types. for (AB_PART_TYPE ab_part_type = start_type; ab_part_type <= end_type;
ab_part_type++) { const PARTITION_TYPE part_type = ab_part_type + PARTITION_HORZ_A;
// Check if the AB partition search is to be performed. if (!ab_partitions_allowed[ab_part_type]) { continue;
}
blk_params.subsize = get_partition_subsize(bsize, part_type); for (int i = 0; i < SUB_PARTITIONS_AB; i++) { // Set AB partition context.
cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc(
cpi, ab_subsize[ab_part_type][i], &td->shared_coeff_buf); if (!cur_part_ctxs[ab_part_type][i])
aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PICK_MODE_CONTEXT"); // Set mode as not ready.
cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0;
}
if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab) { // We can copy directly the mode search results if we have already // searched the current block and the contexts match. if (is_ctx_ready[ab_part_type][0]) {
av1_copy_tree_context(cur_part_ctxs[ab_part_type][0],
mode_srch_ctx[ab_part_type][0][0]);
cur_part_ctxs[ab_part_type][0]->mic.partition = part_type;
cur_part_ctxs[ab_part_type][0]->rd_mode_is_ready = 1; if (is_ctx_ready[ab_part_type][1]) {
av1_copy_tree_context(cur_part_ctxs[ab_part_type][1],
mode_srch_ctx[ab_part_type][1][0]);
cur_part_ctxs[ab_part_type][1]->mic.partition = part_type;
cur_part_ctxs[ab_part_type][1]->rd_mode_is_ready = 1;
}
}
}
// Even if the contexts don't match, we can still speed up by reusing the // previous prediction mode. const MB_MODE_INFO *mode_cache[3] = { NULL, NULL, NULL }; if (cpi->sf.part_sf.reuse_best_prediction_for_part_ab) {
set_mode_cache_for_partition_ab(mode_cache, pc_tree, ab_part_type);
}
// Calculate the total cost and update the best partition.
av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc); if (part_search_state->sum_rdc.rdcost < best_rdc->rdcost) {
*best_rdc = part_search_state->sum_rdc;
part_search_state->found_best_partition = true;
pc_tree->partitioning = partition_type;
} #if CONFIG_COLLECT_PARTITION_STATS if (part_timing_stats->timer_is_on) {
end_partition_block_timer(part_timing_stats, partition_type,
part_search_state->sum_rdc.rdcost);
} #endif
av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col,
blk_params.bsize, av1_num_planes(cm));
}
// Do not evaluate extended partitions if NONE partition is skippable. staticinlineint prune_ext_part_none_skippable(
PICK_MODE_CONTEXT *part_none, int must_find_valid_partition, int skip_non_sq_part_based_on_none, BLOCK_SIZE bsize) { if ((skip_non_sq_part_based_on_none >= 1) && (part_none != NULL)) { if (part_none->skippable && !must_find_valid_partition &&
bsize >= BLOCK_16X16) { return 1;
}
} return 0;
}
// Allow ab partition search staticint allow_ab_partition_search(PartitionSearchState *part_search_state,
PARTITION_SPEED_FEATURES *part_sf,
PARTITION_TYPE curr_best_part, int must_find_valid_partition, int prune_ext_part_state,
int64_t best_rdcost) { const PartitionBlkParams blk_params = part_search_state->part_blk_params; const BLOCK_SIZE bsize = blk_params.bsize;
// Do not prune if there is no valid partition if (best_rdcost == INT64_MAX) return 1;
// Determine bsize threshold to evaluate ab partitions
BLOCK_SIZE ab_bsize_thresh = part_sf->ext_partition_eval_thresh; if (part_sf->ext_part_eval_based_on_cur_best && !must_find_valid_partition &&
!(curr_best_part == PARTITION_HORZ || curr_best_part == PARTITION_VERT))
ab_bsize_thresh = BLOCK_128X128;
// ab partitions are only allowed for square block sizes BLOCK_16X16 or // higher, so ab_bsize_thresh must be large enough to exclude BLOCK_4X4 and // BLOCK_8X8.
assert(ab_bsize_thresh >= BLOCK_8X8);
// Prune 4-way partitions based on the number of horz/vert wins // in the current block and sub-blocks in PARTITION_SPLIT. staticvoid prune_4_partition_using_split_info(
AV1_COMP *const cpi, MACROBLOCK *x, PartitionSearchState *part_search_state, int part4_search_allowed[NUM_PART4_TYPES]) {
PART4_TYPES cur_part[NUM_PART4_TYPES] = { HORZ4, VERT4 }; // Count of child blocks in which HORZ or VERT partition has won int num_child_rect_win[NUM_RECT_PARTS] = { 0, 0 }; // Prune HORZ4/VERT4 partitions based on number of HORZ/VERT winners of // split partiitons. // Conservative pruning for high quantizers. constint num_win_thresh = AOMMIN(3 * (MAXQ - x->qindex) / MAXQ + 1, 3);
for (RECT_PART_TYPE i = HORZ; i < NUM_RECT_PARTS; i++) { if (!(cpi->sf.part_sf.prune_ext_part_using_split_info &&
part4_search_allowed[cur_part[i]])) continue; // Loop over split partitions. // Get rectangular partitions winner info of split partitions. for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; idx++)
num_child_rect_win[i] +=
(part_search_state->split_part_rect_win[idx].rect_part_win[i]) ? 1
: 0; if (num_child_rect_win[i] < num_win_thresh) {
part4_search_allowed[cur_part[i]] = 0;
}
}
}
// 4-way partitions are only allowed for BLOCK_16X16, BLOCK_32X32, and // BLOCK_64X64, so part4_bsize_thresh must be large enough to exclude // BLOCK_4X4 and BLOCK_8X8.
assert(part4_bsize_thresh >= BLOCK_8X8);
// Disable 4-way partition search flags for width less than a multiple of the // minimum partition width. if (blk_params.width < (blk_params.min_partition_size_1d
<< cpi->sf.part_sf.prune_part4_search)) {
part4_search_allowed[HORZ4] = 0;
part4_search_allowed[VERT4] = 0; return;
}
PARTITION_TYPE cur_part[NUM_PART4_TYPES] = { PARTITION_HORZ_4,
PARTITION_VERT_4 }; const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg; // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or // PARTITION_VERT_4 for this block. This is almost the same as // partition4_allowed, except that we don't allow 128x32 or 32x128 // blocks, so we require that bsize is not BLOCK_128X128.
partition4_allowed &=
part_cfg->enable_1to4_partitions && bsize != BLOCK_128X128;
for (PART4_TYPES i = HORZ4; i < NUM_PART4_TYPES; i++) {
part4_search_allowed[i] =
partition4_allowed && part_search_state->partition_rect_allowed[i] &&
get_plane_block_size(get_partition_subsize(bsize, cur_part[i]),
part_search_state->ss_x,
part_search_state->ss_y) != BLOCK_INVALID;
} // Pruning: pruning out 4-way partitions based on the current best partition. if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 2) {
part4_search_allowed[HORZ4] &= (pc_tree->partitioning == PARTITION_HORZ ||
pc_tree->partitioning == PARTITION_HORZ_A ||
pc_tree->partitioning == PARTITION_HORZ_B ||
pc_tree->partitioning == PARTITION_SPLIT ||
pc_tree->partitioning == PARTITION_NONE);
part4_search_allowed[VERT4] &= (pc_tree->partitioning == PARTITION_VERT ||
pc_tree->partitioning == PARTITION_VERT_A ||
pc_tree->partitioning == PARTITION_VERT_B ||
pc_tree->partitioning == PARTITION_SPLIT ||
pc_tree->partitioning == PARTITION_NONE);
}
// Pruning: pruning out some 4-way partitions using a DNN taking rd costs of // sub-blocks from basic partition types. if (cpi->sf.part_sf.ml_prune_partition && partition4_allowed &&
part_search_state->partition_rect_allowed[HORZ] &&
part_search_state->partition_rect_allowed[VERT]) {
av1_ml_prune_4_partition(cpi, x, pc_tree->partitioning, best_rdc->rdcost,
part_search_state, part4_search_allowed,
pb_source_variance);
}
// Pruning: pruning out 4-way partitions based on the number of horz/vert wins // in the current block and sub-blocks in PARTITION_SPLIT.
prune_4_partition_using_split_info(cpi, x, part_search_state,
part4_search_allowed);
}
// Set params needed for PARTITION_NONE search. staticvoid set_none_partition_params(const AV1_COMP *const cpi, ThreadData *td,
MACROBLOCK *x, PC_TREE *pc_tree,
PartitionSearchState *part_search_state,
RD_STATS *best_remain_rdcost,
RD_STATS *best_rdc, int *pt_cost) {
PartitionBlkParams blk_params = part_search_state->part_blk_params;
RD_STATS partition_rdcost; // Set PARTITION_NONE context. if (pc_tree->none == NULL)
pc_tree->none = av1_alloc_pmc(cpi, blk_params.bsize, &td->shared_coeff_buf); if (!pc_tree->none)
aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PICK_MODE_CONTEXT");
// Set PARTITION_NONE type cost. if (part_search_state->partition_none_allowed) { if (blk_params.bsize_at_least_8x8) {
*pt_cost = part_search_state->partition_cost[PARTITION_NONE] < INT_MAX
? part_search_state->partition_cost[PARTITION_NONE]
: 0;
}
// Adjust dist breakout threshold according to the partition size. const int64_t dist_breakout_thr =
cpi->sf.part_sf.partition_search_breakout_dist_thr >>
((2 * (MAX_SB_SIZE_LOG2 - 2)) -
(mi_size_wide_log2[bsize] + mi_size_high_log2[bsize])); constint rate_breakout_thr =
cpi->sf.part_sf.partition_search_breakout_rate_thr *
num_pels_log2_lookup[bsize]; // If all y, u, v transform blocks in this partition are skippable, // and the dist & rate are within the thresholds, the partition // search is terminated for current branch of the partition search // tree. The dist & rate thresholds are set to 0 at speed 0 to // disable the early termination at that speed. if (best_rdc->dist < dist_breakout_thr &&
best_rdc->rate < rate_breakout_thr) {
part_search_state->do_square_split = 0;
part_search_state->do_rectangular_split = 0;
}
}
// Early termination: using simple_motion_search features and the // rate, distortion, and rdcost of PARTITION_NONE, a DNN will make a // decision on early terminating at PARTITION_NONE. if (cpi->sf.part_sf.simple_motion_search_early_term_none && cm->show_frame &&
!frame_is_intra_only(cm) && bsize >= BLOCK_16X16 &&
av1_blk_has_rows_and_cols(&blk_params) && this_rdc->rdcost < INT64_MAX &&
this_rdc->rdcost >= 0 && this_rdc->rate < INT_MAX &&
this_rdc->rate >= 0 &&
(part_search_state->do_square_split ||
part_search_state->do_rectangular_split)) {
av1_simple_motion_search_early_term_none(cpi, x, sms_tree, this_rdc,
part_search_state);
}
}
// Decide early termination and rectangular partition pruning // based on PARTITION_NONE and PARTITION_SPLIT costs. staticvoid prune_partitions_after_split(
AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
PartitionSearchState *part_search_state, RD_STATS *best_rdc,
int64_t part_none_rd, int64_t part_split_rd) { const AV1_COMMON *const cm = &cpi->common;
PartitionBlkParams blk_params = part_search_state->part_blk_params; constint mi_row = blk_params.mi_row; constint mi_col = blk_params.mi_col; const BLOCK_SIZE bsize = blk_params.bsize;
assert(bsize < BLOCK_SIZES_ALL);
// Early termination: using the rd costs of PARTITION_NONE and subblocks // from PARTITION_SPLIT to determine an early breakout. if (cpi->sf.part_sf.ml_early_term_after_part_split_level &&
!frame_is_intra_only(cm) &&
!part_search_state->terminate_partition_search &&
part_search_state->do_rectangular_split &&
(part_search_state->partition_rect_allowed[HORZ] ||
part_search_state->partition_rect_allowed[VERT])) {
av1_ml_early_term_after_split(
cpi, x, sms_tree, best_rdc->rdcost, part_none_rd, part_split_rd,
part_search_state->split_rd, part_search_state);
}
// Use the rd costs of PARTITION_NONE and subblocks from PARTITION_SPLIT // to prune out rectangular partitions in some directions. if (!cpi->sf.part_sf.ml_early_term_after_part_split_level &&
cpi->sf.part_sf.ml_prune_partition && !frame_is_intra_only(cm) &&
(part_search_state->partition_rect_allowed[HORZ] ||
part_search_state->partition_rect_allowed[VERT]) &&
!(part_search_state->prune_rect_part[HORZ] ||
part_search_state->prune_rect_part[VERT]) &&
!part_search_state->terminate_partition_search) {
av1_setup_src_planes(x, cpi->source, mi_row, mi_col, av1_num_planes(cm),
bsize);
av1_ml_prune_rect_partition(cpi, x, best_rdc->rdcost,
part_search_state->none_rd,
part_search_state->split_rd, part_search_state);
}
}
// Returns true if either of the left and top neighbor blocks is larger than // the current block; false otherwise. staticinlinebool is_neighbor_blk_larger_than_cur_blk(const MACROBLOCKD *xd,
BLOCK_SIZE bsize) { constint cur_blk_area = (block_size_high[bsize] * block_size_wide[bsize]); if (xd->left_available) { const BLOCK_SIZE left_bsize = xd->left_mbmi->bsize; if (block_size_high[left_bsize] * block_size_wide[left_bsize] >
cur_blk_area) returntrue;
}
if (xd->up_available) { const BLOCK_SIZE above_bsize = xd->above_mbmi->bsize; if (block_size_high[above_bsize] * block_size_wide[above_bsize] >
cur_blk_area) returntrue;
} returnfalse;
}
staticinlinevoid prune_rect_part_using_none_pred_mode( const MACROBLOCKD *xd, PartitionSearchState *part_state,
PREDICTION_MODE mode, BLOCK_SIZE bsize) { if (mode == DC_PRED || mode == SMOOTH_PRED) { // If the prediction mode of NONE partition is either DC_PRED or // SMOOTH_PRED, it indicates that the current block has less variation. In // this case, HORZ and VERT partitions are pruned if at least one of left // and top neighbor blocks is larger than the current block. if (is_neighbor_blk_larger_than_cur_blk(xd, bsize)) {
part_state->prune_rect_part[HORZ] = 1;
part_state->prune_rect_part[VERT] = 1;
}
} elseif (mode == D67_PRED || mode == V_PRED || mode == D113_PRED) { // If the prediction mode chosen by NONE partition is close to 90 degrees, // it implies a dominant vertical pattern, and the chance of choosing a // vertical rectangular partition is high. Hence, horizontal partition is // pruned in these cases.
part_state->prune_rect_part[HORZ] = 1;
} elseif (mode == D157_PRED || mode == H_PRED || mode == D203_PRED) { // If the prediction mode chosen by NONE partition is close to 180 degrees, // it implies a dominant horizontal pattern, and the chance of choosing a // horizontal rectangular partition is high. Hence, vertical partition is // pruned in these cases.
part_state->prune_rect_part[VERT] = 1;
}
}
// Check if partition split is allowed. if (part_search_state->terminate_partition_search ||
!part_search_state->do_square_split) return;
for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { if (pc_tree->split[i] == NULL)
pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); if (!pc_tree->split[i])
aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PC_TREE");
pc_tree->split[i]->index = i;
}
// Initialization of this partition RD stats.
av1_init_rd_stats(&sum_rdc);
sum_rdc.rate = part_search_state->partition_cost[PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
// Set split ctx as ready for use. if (idx <= 1 && (bsize <= BLOCK_8X8 ||
pc_tree->split[idx]->partitioning == PARTITION_NONE)) { const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none->mic; const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; // Neither palette mode nor cfl predicted. if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { if (mbmi->uv_mode != UV_CFL_PRED)
part_search_state->is_split_ctx_is_ready[idx] = 1;
}
}
} #if CONFIG_COLLECT_PARTITION_STATS if (part_timing_stats->timer_is_on) {
end_partition_block_timer(part_timing_stats, PARTITION_SPLIT,
sum_rdc.rdcost);
} #endif constint reached_last_index = (idx == SUB_PARTITIONS_SPLIT);
// Calculate the total cost and update the best partition.
*part_split_rd = sum_rdc.rdcost; if (reached_last_index && sum_rdc.rdcost < best_rdc->rdcost) {
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (sum_rdc.rdcost < best_rdc->rdcost) {
*best_rdc = sum_rdc;
part_search_state->found_best_partition = true;
pc_tree->partitioning = PARTITION_SPLIT;
}
} elseif (cpi->sf.part_sf.less_rectangular_check_level > 0) { // Skip rectangular partition test when partition type none gives better // rd than partition type split. if (cpi->sf.part_sf.less_rectangular_check_level == 2 || idx <= 2) { constint partition_none_valid = part_search_state->none_rd > 0; constint partition_none_better =
part_search_state->none_rd < sum_rdc.rdcost;
part_search_state->do_rectangular_split &=
!(partition_none_valid && partition_none_better);
}
} // Restore the context for the following cases: // 1) Current block size not more than maximum partition size as dry run // encode happens for these cases // 2) Current block size same as superblock size as the final encode // happens for this case if (bsize <= x->sb_enc.max_partition_size || bsize == cm->seq_params->sb_size)
av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
}
// The max number of nodes in the partition tree. // The number of leaf nodes is (128x128) / (4x4) = 1024. // The number of All possible parent nodes is 1 + 2 + ... + 512 = 1023. #define NUM_NODES 2048
// Save rdmult before it might be changed, so it can be restored later. constint orig_rdmult = x->rdmult;
setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
(void)orig_rdmult;
// The ML model needs to provide the whole decision tree for the superblock. staticbool ml_partition_search_whole_tree(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data,
TokenExtra **tp,
SIMPLE_MOTION_DATA_TREE *sms_root, int mi_row, int mi_col, const BLOCK_SIZE bsize) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
ExtPartController *const ext_part_controller = &cpi->ext_part_controller; struct aom_internal_error_info *error_info = x->e_mbd.error_info;
aom_partition_features_t features;
prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize,
&features);
features.mi_row = mi_row;
features.mi_col = mi_col;
features.frame_width = cpi->frame_info.frame_width;
features.frame_height = cpi->frame_info.frame_height;
features.block_size = bsize;
av1_ext_part_send_features(ext_part_controller, &features);
// rd mode search (dry run) for a valid partition decision from the ml model.
aom_partition_decision_t partition_decision; do { constbool valid_decision = av1_ext_part_get_partition_decision(
ext_part_controller, &partition_decision); if (!valid_decision) returnfalse;
// First, let's take the easy approach. // We require that the ml model has to provide partition decisions for the // whole superblock.
td->pc_root = av1_alloc_pc_tree_node(bsize); if (!td->pc_root)
aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PC_TREE");
build_pc_tree_from_part_decision(&partition_decision, bsize, td->pc_root,
error_info);
staticinlinebool should_do_dry_run_encode_for_current_block(
BLOCK_SIZE sb_size, BLOCK_SIZE max_partition_size, int curr_block_index,
BLOCK_SIZE bsize) { if (bsize > max_partition_size) returnfalse;
// Enable the reconstruction with dry-run for the 4th sub-block only if its // parent block's reconstruction with dry-run is skipped. If // max_partition_size is the same as immediate split of superblock, then avoid // reconstruction of the 4th sub-block, as this data is not consumed. if (curr_block_index != 3) returntrue;
staticvoid log_sub_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, double *var_min, double *var_max) { // This functions returns a the minimum and maximum log variances for 4x4 // sub blocks in the current block.
/*!\brief AV1 block partition search (full search). * * \ingroup partition_search * \callgraph * Searches for the best partition pattern for a block based on the * rate-distortion cost, and returns a bool value to indicate whether a valid * partition pattern is found. The partition can recursively go down to the * smallest block size. * * \param[in] cpi Top-level encoder structure * \param[in] td Pointer to thread data * \param[in] tile_data Pointer to struct holding adaptive data/contexts/models for the tile during encoding * \param[in] tp Pointer to the starting token * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of MI_SIZE * \param[in] bsize Current block size * \param[in] rd_cost Pointer to the final rd cost of the block * \param[in] best_rdc Upper bound of rd cost of a valid partition * \param[in] pc_tree Pointer to the PC_TREE node storing the picked partitions and mode info for the current block * \param[in] sms_tree Pointer to struct holding simple motion search data for the current block * \param[in] none_rd Pointer to the rd cost in the case of not splitting the current block * \param[in] multi_pass_mode SB_SINGLE_PASS/SB_DRY_PASS/SB_WET_PASS * \param[in] rect_part_win_info Pointer to struct storing whether horz/vert partition outperforms previously tested partitions * * \return A bool value is returned indicating if a valid partition is found. * The pc_tree struct is modified to store the picked partition and modes. * The rd_cost struct is also updated with the RD stats corresponding to the * best partition found.
*/ bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TokenExtra **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_STATS *rd_cost,
RD_STATS best_rdc, PC_TREE *pc_tree,
SIMPLE_MOTION_DATA_TREE *sms_tree, int64_t *none_rd,
SB_MULTI_PASS_MODE multi_pass_mode,
RD_RECT_PART_WIN_INFO *rect_part_win_info) { const AV1_COMMON *const cm = &cpi->common; constint num_planes = av1_num_planes(cm);
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; const TokenExtra *const tp_orig = *tp;
PartitionSearchState part_search_state;
// Initialization of state variables used in partition search.
init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col,
bsize);
PartitionBlkParams blk_params = part_search_state.part_blk_params;
set_sms_tree_partitioning(sms_tree, PARTITION_NONE); if (best_rdc.rdcost < 0) {
av1_invalid_rd_stats(rd_cost); return part_search_state.found_best_partition;
} if (bsize == cm->seq_params->sb_size) x->must_find_valid_partition = 0;
// Override skipping rectangular partition operations for edge blocks. if (none_rd) *none_rd = 0;
(void)*tp_orig;
#if CONFIG_COLLECT_PARTITION_STATS // Stats at the current quad tree
PartitionTimingStats *part_timing_stats =
&part_search_state.part_timing_stats; // Stats aggregated at frame level
FramePartitionTimingStats *fr_part_timing_stats = &cpi->partition_stats; #endif// CONFIG_COLLECT_PARTITION_STATS
// Override partition costs at the edges of the frame in the same // way as in read_partition (see decodeframe.c). if (!av1_blk_has_rows_and_cols(&blk_params))
set_partition_cost_for_edge_blk(cm, &part_search_state);
// Disable rectangular partitions for inner blocks when the current block is // forced to only use square partitions. if (bsize > cpi->sf.part_sf.use_square_partition_only_threshold) {
part_search_state.partition_rect_allowed[HORZ] &= !blk_params.has_rows;
part_search_state.partition_rect_allowed[VERT] &= !blk_params.has_cols;
}
#ifndef NDEBUG // Nothing should rely on the default value of this array (which is just // leftover from encoding the previous block. Setting it to fixed pattern // when debugging. // bit 0, 1, 2 are blk_skip of each plane // bit 4, 5, 6 are initialization checking of each plane
memset(x->txfm_search_info.blk_skip, 0x77, sizeof(x->txfm_search_info.blk_skip)); #endif// NDEBUG
// Save rdmult before it might be changed, so it can be restored later. constint orig_rdmult = x->rdmult;
setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
// Apply simple motion search for the entire super block with fixed block // size, e.g., 16x16, to collect features and write to files for the // external ML model. // TODO(chengchen): reduce motion search. This function is similar to // av1_get_max_min_partition_features(). if (COLLECT_MOTION_SEARCH_FEATURE_SB && !frame_is_intra_only(cm) &&
bsize == cm->seq_params->sb_size) {
av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col,
bsize, /*features=*/NULL);
collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, /*features=*/NULL);
}
// Update rd cost of the bound using the current multiplier.
av1_rd_cost_update(x->rdmult, &best_rdc);
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, av1_prune_partitions_time); #endif // Pruning: before searching any partition type, using source and simple // motion search results to prune out unlikely partitions.
av1_prune_partitions_before_search(cpi, x, sms_tree, &part_search_state);
// Pruning: eliminating partition types leading to coding block sizes outside // the min and max bsize limitations set from the encoder.
av1_prune_partitions_by_max_min_bsize(&x->sb_enc, &part_search_state); #if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, av1_prune_partitions_time); #endif
// Partition search
BEGIN_PARTITION_SEARCH: // If a valid partition is required, usually when the first round cannot find // a valid one under the cost limit after pruning, reset the limitations on // partition types and intra cnn output. if (x->must_find_valid_partition) {
reset_part_limitations(cpi, &part_search_state);
av1_prune_partitions_by_max_min_bsize(&x->sb_enc, &part_search_state); // Invalidate intra cnn output for key frames. if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) {
part_search_state.intra_part_info->quad_tree_idx = 0;
part_search_state.intra_part_info->cnn_output_valid = 0;
}
} // Partition block source pixel variance. unsignedint pb_source_variance = UINT_MAX;
// Further pruning or in some cases reverse pruning when allintra is set. // This code helps visual and in some cases metrics quality where the // current block comprises at least one very low variance sub-block and at // least one where the variance is much higher. // // The idea is that in such cases there is danger of ringing and other // visual artifacts from a high variance feature such as an edge into a // very low variance region. // // The approach taken is to force break down / split to a smaller block // size to try and separate out the low variance and well predicted blocks // from the more complex ones and to prevent propagation of ringing over a // large region. if (bsize_at_least_16x16 && (var_min < 0.272) &&
((var_max - var_min) > 3.0)) {
part_search_state.partition_none_allowed = 0;
part_search_state.terminate_partition_search = 0;
part_search_state.do_square_split = 1;
} elseif (prune_rect_part_using_4x4_var_deviation &&
(var_max - var_min < 3.0)) { // Prune rectangular partitions if the variance deviation of 4x4 // sub-blocks within the block is less than a threshold (derived // empirically).
part_search_state.do_rectangular_split = 0;
}
}
}
// Do not evaluate non-square partitions if NONE partition did not choose a // newmv mode and is skippable. if ((cpi->sf.part_sf.skip_non_sq_part_based_on_none >= 2) &&
(pc_tree->none != NULL)) { if (x->qindex <= 200 && is_inter_mode(pc_tree->none->mic.mode) &&
!have_newmv_in_inter_mode(pc_tree->none->mic.mode) &&
pc_tree->none->skippable && !x->must_find_valid_partition &&
bsize >= BLOCK_16X16)
part_search_state.do_rectangular_split = 0;
}
if (bsize == cm->seq_params->sb_size &&
!part_search_state.found_best_partition) { // Did not find a valid partition, go back and search again, with less // constraint on which partition types to search.
x->must_find_valid_partition = 1; #if CONFIG_COLLECT_PARTITION_STATS
fr_part_timing_stats->partition_redo += 1; #endif// CONFIG_COLLECT_PARTITION_STATS goto BEGIN_PARTITION_SEARCH;
}
// Store the final rd cost
*rd_cost = best_rdc;
// Also record the best partition in simple motion data tree because it is // necessary for the related speed features.
set_sms_tree_partitioning(sms_tree, pc_tree->partitioning);
// If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each // prediction block.
print_partition_timing_stats_with_rdcost(
part_timing_stats, mi_row, mi_col, bsize,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
cm->current_frame.frame_number, &best_rdc, "part_timing.csv"); constbool print_timing_stats = false; if (print_timing_stats) {
print_partition_timing_stats(part_timing_stats, cm->show_frame,
frame_is_intra_only(cm), bsize, "part_timing_data.csv");
} // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for // the whole clip. So we need to pass the information upstream to the encoder.
accumulate_partition_timing_stats(fr_part_timing_stats, part_timing_stats,
bsize); #endif// CONFIG_COLLECT_PARTITION_STATS
// Reset the PC_TREE deallocation flag. int pc_tree_dealloc = 0;
// Write partition tree to file. Not used by default. if (COLLECT_MOTION_SEARCH_FEATURE_SB) {
write_partition_tree(cpi, pc_tree, bsize, mi_row, mi_col);
++cpi->sb_counter;
}
// If the tree still exists (non-superblock), dealloc most nodes, only keep // nodes for the best partition and PARTITION_NONE. if (pc_tree_dealloc == 0)
av1_free_pc_tree_recursive(pc_tree, num_planes, 1, 1,
cpi->sf.part_sf.partition_search_type);
for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); if (!pc_tree->split[i])
aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, "Failed to allocate PC_TREE");
pc_tree->split[i]->index = i;
}
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); for (int i = 0;
i < SUB_PARTITIONS_SPLIT && sum_rdc.rdcost < best_rdc.rdcost; ++i) { constint x_idx = (i & 1) * hbs; constint y_idx = (i >> 1) * hbs;
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.169Bemerkung:
(vorverarbeitet am 2026-04-25)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.