/* * Copyright (c) 2023, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/ #include <assert.h> #include <float.h> #include <string.h>
// This function is to extract red/green/blue channels, and calculate intensity // = (r+g+b)/3. Note that it only handles 8bits case now. // TODO(linzhen): add high bitdepth support. staticvoid get_color_intensity(const YV12_BUFFER_CONFIG *src, int subsampling_x, int subsampling_y, double *cr, double *cg, double *cb, double *intensity) { const uint8_t *y = src->buffers[0]; const uint8_t *u = src->buffers[1]; const uint8_t *v = src->buffers[2];
staticinlinedouble convolve_map(constdouble *filter, constdouble *map, constint size) { double result = 0; for (int i = 0; i < size; ++i) {
result += filter[i] * map[i]; // symmetric filter is used
} return result;
}
// This function is to decimate the map by half, and apply Gaussian filter on // top of the downsampled map. staticinlinevoid decimate_map(constdouble *map, int height, int width, int stride, double *downsampled_map) { constint new_width = width / 2; constint window_size = 5; constdouble gaussian_filter[25] = {
1. / 256, 1.0 / 64, 3. / 128, 1. / 64, 1. / 256, 1. / 64, 1. / 16,
3. / 32, 1. / 16, 1. / 64, 3. / 128, 3. / 32, 9. / 64, 3. / 32,
3. / 128, 1. / 64, 1. / 16, 3. / 32, 1. / 16, 1. / 64, 1. / 256,
1. / 64, 3. / 128, 1. / 64, 1. / 256
};
double map_region[25]; for (int y = 0; y < height - 1; y += 2) { for (int x = 0; x < width - 1; x += 2) { int i = 0; for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) { for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) { int yvalue = clamp(yy, 0, height - 1); int xvalue = clamp(xx, 0, width - 1);
map_region[i++] = map[yvalue * stride + xvalue];
}
}
downsampled_map[(y / 2) * new_width + (x / 2)] =
convolve_map(gaussian_filter, map_region, window_size * window_size);
}
}
}
// This function is to upscale the map from in_level size to out_level size. // Note that the map at "level-1" will upscale the map at "level" by x2. staticinlineint upscale_map(constdouble *input, int in_level, int out_level, int height[9], int width[9], double *output) { for (int level = in_level; level > out_level; level--) { constint cur_width = width[level]; constint cur_height = height[level]; constint cur_stride = width[level];
for (int i = 0; i < h_upscale; ++i) { for (int j = 0; j < w_upscale; ++j) { constint ii = clamp((i >> 1), 0, cur_height - 1); constint jj = clamp((j >> 1), 0, cur_width - 1);
upscale[j + i * s_upscale] = (double)original[jj + ii * cur_stride];
}
}
memcpy(output, upscale, h_upscale * w_upscale * sizeof(double));
aom_free(upscale);
}
return 1;
}
// This function calculates the differences between a fine scale c and a // coarser scale s yielding the feature maps. c \in {2, 3, 4}, and s = c + // delta, where delta \in {3, 4}. staticint center_surround_diff(constdouble *input[9], int height[9], int width[9], saliency_feature_map *output[6]) { int j = 0; for (int k = 2; k < 5; ++k) { int cur_height = height[k]; int cur_width = width[k];
if (upscale_map(input[k + 3], k + 3, k, height, width, output[j]->buf) ==
0) { return 0;
}
for (int r = 0; r < cur_height; ++r) { for (int c = 0; c < cur_width; ++c) {
output[j]->buf[r * cur_width + c] =
fabs((double)(input[k][r * cur_width + c] -
output[j]->buf[r * cur_width + c]));
}
}
if (upscale_map(input[k + 4], k + 4, k, height, width,
output[j + 1]->buf) == 0) { return 0;
}
for (int r = 0; r < cur_height; ++r) { for (int c = 0; c < cur_width; ++c) {
output[j + 1]->buf[r * cur_width + c] =
fabs(input[k][r * cur_width + c] -
output[j + 1]->buf[r * cur_width + c]);
}
}
j += 2;
} return 1;
}
// For color channels, the differences is calculated based on "color // double-opponency". For example, the RG feature map is constructed between a // fine scale c of R-G component and a coarser scale s of G-R component. staticint center_surround_diff_rgb(constdouble *input_1[9], constdouble *input_2[9], int height[9], int width[9],
saliency_feature_map *output[6]) { int j = 0; for (int k = 2; k < 5; ++k) { int cur_height = height[k]; int cur_width = width[k];
if (upscale_map(input_2[k + 3], k + 3, k, height, width, output[j]->buf) ==
0) { return 0;
}
for (int r = 0; r < cur_height; ++r) { for (int c = 0; c < cur_width; ++c) {
output[j]->buf[r * cur_width + c] =
fabs((double)(input_1[k][r * cur_width + c] -
output[j]->buf[r * cur_width + c]));
}
}
if (upscale_map(input_2[k + 4], k + 4, k, height, width,
output[j + 1]->buf) == 0) { return 0;
}
for (int r = 0; r < cur_height; ++r) { for (int c = 0; c < cur_width; ++c) {
output[j + 1]->buf[r * cur_width + c] =
fabs(input_1[k][r * cur_width + c] -
output[j + 1]->buf[r * cur_width + c]);
}
}
j += 2;
} return 1;
}
// This function is to generate Gaussian pyramid images with indexes from 0 to // 8, and construct the feature maps from calculating the center-surround // differences. staticint gaussian_pyramid(constdouble *src, int width[9], int height[9],
saliency_feature_map *dst[6]) { double *gaussian_map[9]; // scale = 9
gaussian_map[0] =
(double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0])); if (!gaussian_map[0]) { return 0;
}
for (int y = 0; y < input->height - stepsize; y += stepsize) { for (int x = 0; x < input->width - stepsize; x += stepsize) { for (int i = 0; i < stepsize; ++i) { for (int j = 0; j < stepsize; ++j) {
local_map.buf[i * stepsize + j] =
input->buf[(y + i) * input->width + x + j];
}
}
// Linear normalization the values in the map to [0,1]. staticvoid minmax_normalize(saliency_feature_map *input) { double max_value, min_value;
find_min_max(input, &max_value, &min_value);
// This function is to promote meaningful “activation spots” in the map and // ignores homogeneous areas. staticint nomalization_operator(saliency_feature_map *input, int stepsize) {
minmax_normalize(input); double lmaxmean = average_local_max(input, stepsize); if (lmaxmean < 0) { return 0;
} double normCoeff = (1 - lmaxmean) * (1 - lmaxmean);
for (int i = 0; i < input->height; ++i) { for (int j = 0; j < input->width; ++j) {
input->buf[i * input->width + j] *= normCoeff;
}
}
return 1;
}
// Normalize the values in feature maps to [0,1], and then upscale all maps to // the original frame size. staticint normalize_fm(saliency_feature_map *input[6], int width[9], int height[9], int num_fm,
saliency_feature_map *output[6]) { // Feature maps (FM) are generated by function "center_surround_diff()". The // difference is between a fine scale c and a coarser scale s, where c \in {2, // 3, 4}, and s = c + delta, where delta \in {3, 4}, and the FM size is scale // c. Specifically, i=0: c=2 and s=5, i=1: c=2 and s=6, i=2: c=3 and s=6, i=3: // c=3 and s=7, i=4: c=4 and s=7, i=5: c=4 and s=8. for (int i = 0; i < num_fm; ++i) { if (nomalization_operator(input[i], 8) == 0) { return 0;
}
// Upscale FM to original frame size if (upscale_map(input[i]->buf, (i / 2) + 2, 0, height, width,
output[i]->buf) == 0) { return 0;
}
} return 1;
}
// Combine feature maps with the same category (intensity, color, or // orientation) into one conspicuity map. staticint normalized_map(saliency_feature_map *input[6], int width[9], int height[9], saliency_feature_map *output) { int num_fm = 6;
saliency_feature_map *n_input[6]; for (int i = 0; i < 6; ++i) {
n_input[i] = (saliency_feature_map *)aom_malloc(sizeof(*n_input[i])); if (!n_input[i]) { return 0;
}
n_input[i]->buf =
(double *)aom_malloc(width[0] * height[0] * sizeof(*n_input[i]->buf)); if (!n_input[i]->buf) {
aom_free(n_input[i]); return 0;
}
n_input[i]->height = height[0];
n_input[i]->width = width[0];
}
if (normalize_fm(input, width, height, num_fm, n_input) == 0) { for (int i = 0; i < num_fm; ++i) {
aom_free(n_input[i]->buf);
aom_free(n_input[i]);
} return 0;
}
// Add up all normalized feature maps with the same category into one map. for (int i = 0; i < num_fm; ++i) { for (int r = 0; r < height[0]; ++r) { for (int c = 0; c < width[0]; ++c) {
output->buf[r * width[0] + c] += n_input[i]->buf[r * width[0] + c];
}
}
}
for (int i = 0; i < num_fm; ++i) {
aom_free(n_input[i]->buf);
aom_free(n_input[i]);
}
nomalization_operator(output, 8); return 1;
}
staticint normalized_map_rgb(saliency_feature_map *rg_map[6],
saliency_feature_map *by_map[6], int width[9], int height[9], saliency_feature_map *output) {
saliency_feature_map *color_cm[2]; // 0: color_cm_rg, 1: color_cm_by for (int i = 0; i < 2; ++i) {
color_cm[i] = aom_malloc(sizeof(*color_cm[i])); if (!color_cm[i]) { return 0;
}
color_cm[i]->buf =
(double *)aom_malloc(width[0] * height[0] * sizeof(*color_cm[i]->buf)); if (!color_cm[i]->buf) { for (int l = 0; l < i; ++l) {
aom_free(color_cm[l]->buf);
}
aom_free(color_cm[i]); return 0;
}
if (normalized_map(rg_map, width, height, color_cm[0]) == 0 ||
normalized_map(by_map, width, height, color_cm[1]) == 0) { for (int i = 0; i < 2; ++i) {
aom_free(color_cm[i]->buf);
aom_free(color_cm[i]);
} return 0;
}
for (int r = 0; r < height[0]; ++r) { for (int c = 0; c < width[0]; ++c) {
output->buf[r * width[0] + c] = color_cm[0]->buf[r * width[0] + c] +
color_cm[1]->buf[r * width[0] + c];
}
}
for (int i = 0; i < 2; ++i) {
aom_free(color_cm[i]->buf);
aom_free(color_cm[i]);
}
nomalization_operator(output, 8); return 1;
}
staticint normalized_map_orientation(saliency_feature_map *orientation_map[24], int width[9], int height[9],
saliency_feature_map *output) { int num_fms_per_angle = 6;
saliency_feature_map *ofm[4][6]; for (int i = 0; i < num_fms_per_angle; ++i) { for (int j = 0; j < 4; ++j) {
ofm[j][i] = orientation_map[j * num_fms_per_angle + i];
}
}
// extract conspicuity map for each angle
saliency_feature_map *nofm = aom_malloc(sizeof(*nofm)); if (!nofm) { return 0;
}
nofm->buf = (double *)aom_malloc(width[0] * height[0] * sizeof(*nofm->buf)); if (!nofm->buf) {
aom_free(nofm); return 0;
}
nofm->height = height[0];
nofm->width = width[0];
for (int i = 0; i < 4; ++i) {
memset(nofm->buf, 0, width[0] * height[0] * sizeof(*nofm->buf)); if (normalized_map(ofm[i], width, height, nofm) == 0) {
aom_free(nofm->buf);
aom_free(nofm); return 0;
}
for (int r = 0; r < height[0]; ++r) { for (int c = 0; c < width[0]; ++c) {
output->buf[r * width[0] + c] += nofm->buf[r * width[0] + c];
}
}
}
aom_free(nofm->buf);
aom_free(nofm);
nomalization_operator(output, 8); return 1;
}
// Set pixel level saliency mask based on Itti-Koch algorithm int av1_set_saliency_map(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
int frm_width = cm->width; int frm_height = cm->height;
// Extract red / green / blue channels and intensity component
get_color_intensity(cpi->source, cm->seq_params->subsampling_x,
cm->seq_params->subsampling_y, cr, cg, cb, intensity);
// Feature Map Extraction // intensity map
saliency_feature_map *i_map[6]; for (int i = 0; i < 6; ++i) { int cur_height = pyr_height[(i / 2) + 2]; int cur_width = pyr_width[(i / 2) + 2];
i_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*i_map[i])); if (!i_map[i]) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < i; ++l) {
aom_free(i_map[l]);
} return 0;
}
i_map[i]->buf =
(double *)aom_malloc(cur_height * cur_width * sizeof(*i_map[i]->buf)); if (!i_map[i]->buf) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < i; ++l) {
aom_free(i_map[l]->buf);
aom_free(i_map[l]);
} return 0;
}
i_map[i]->height = cur_height;
i_map[i]->width = cur_width;
}
if (get_feature_map_intensity(intensity, pyr_width, pyr_height, i_map) == 0) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(i_map[l]);
} return 0;
}
// RGB map
saliency_feature_map *rg_map[6], *by_map[6]; for (int i = 0; i < 6; ++i) { int cur_height = pyr_height[(i / 2) + 2]; int cur_width = pyr_width[(i / 2) + 2];
rg_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*rg_map[i]));
by_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*by_map[i])); if (!rg_map[i] || !by_map[i]) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(i_map[l]);
aom_free(rg_map[l]);
aom_free(by_map[l]);
} return 0;
}
rg_map[i]->buf =
(double *)aom_malloc(cur_height * cur_width * sizeof(*rg_map[i]->buf));
by_map[i]->buf =
(double *)aom_malloc(cur_height * cur_width * sizeof(*by_map[i]->buf)); if (!by_map[i]->buf || !rg_map[i]->buf) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(i_map[l]);
} for (int l = 0; l < i; ++l) {
aom_free(rg_map[l]->buf);
aom_free(by_map[l]->buf);
aom_free(rg_map[l]);
aom_free(by_map[l]);
} return 0;
}
rg_map[i]->height = cur_height;
rg_map[i]->width = cur_width;
by_map[i]->height = cur_height;
by_map[i]->width = cur_width;
}
if (get_feature_map_rgb(cr, cg, cb, pyr_width, pyr_height, rg_map, by_map) ==
0) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(rg_map[l]->buf);
aom_free(by_map[l]->buf);
aom_free(i_map[l]);
aom_free(rg_map[l]);
aom_free(by_map[l]);
} return 0;
}
// Orientation map
saliency_feature_map *orientation_map[24]; for (int i = 0; i < 24; ++i) { int cur_height = pyr_height[((i % 6) / 2) + 2]; int cur_width = pyr_width[((i % 6) / 2) + 2];
orientation_map[i] =
(saliency_feature_map *)aom_malloc(sizeof(*orientation_map[i])); if (!orientation_map[i]) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(rg_map[l]->buf);
aom_free(by_map[l]->buf);
aom_free(i_map[l]);
aom_free(rg_map[l]);
aom_free(by_map[l]);
} for (int h = 0; h < i; ++h) {
aom_free(orientation_map[h]);
} return 0;
}
orientation_map[i]->buf = (double *)aom_malloc(
cur_height * cur_width * sizeof(*orientation_map[i]->buf)); if (!orientation_map[i]->buf) {
aom_free(cr);
aom_free(cg);
aom_free(cb);
aom_free(intensity); for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(rg_map[l]->buf);
aom_free(by_map[l]->buf);
aom_free(i_map[l]);
aom_free(rg_map[l]);
aom_free(by_map[l]);
}
for (int h = 0; h < i; ++h) {
aom_free(orientation_map[h]->buf);
aom_free(orientation_map[h]->buf);
aom_free(orientation_map[h]);
aom_free(orientation_map[h]);
} return 0;
}
for (int i = 0; i < 3; ++i) {
normalized_maps[i] = aom_malloc(sizeof(*normalized_maps[i])); if (!normalized_maps[i]) { for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(rg_map[l]->buf);
aom_free(by_map[l]->buf);
aom_free(i_map[l]);
aom_free(rg_map[l]);
aom_free(by_map[l]);
}
for (int h = 0; h < 24; ++h) {
aom_free(orientation_map[h]->buf);
aom_free(orientation_map[h]);
}
for (int l = 0; l < i; ++l) {
aom_free(normalized_maps[l]);
} return 0;
}
normalized_maps[i]->buf = (double *)aom_malloc(
frm_width * frm_height * sizeof(*normalized_maps[i]->buf)); if (!normalized_maps[i]->buf) { for (int l = 0; l < 6; ++l) {
aom_free(i_map[l]->buf);
aom_free(rg_map[l]->buf);
aom_free(by_map[l]->buf);
aom_free(i_map[l]);
aom_free(rg_map[l]);
aom_free(by_map[l]);
} for (int h = 0; h < 24; ++h) {
aom_free(orientation_map[h]->buf);
aom_free(orientation_map[h]);
} for (int l = 0; l < i; ++l) {
aom_free(normalized_maps[l]->buf);
aom_free(normalized_maps[l]);
} return 0;
}
normalized_maps[i]->width = frm_width;
normalized_maps[i]->height = frm_height;
memset(normalized_maps[i]->buf, 0,
frm_width * frm_height * sizeof(*normalized_maps[i]->buf));
}
for (int r = 0; r < frm_height; ++r) { for (int c = 0; c < frm_width; ++c) {
combined_saliency_map->buf[r * frm_width + c] =
(w_intensity * normalized_maps[0]->buf[r * frm_width + c] +
w_color * normalized_maps[1]->buf[r * frm_width + c] +
w_orient * normalized_maps[2]->buf[r * frm_width + c]);
}
}
for (int r = 0; r < frm_height; ++r) { for (int c = 0; c < frm_width; ++c) { int index = r * frm_width + c;
cpi->saliency_map[index] =
(uint8_t)(combined_saliency_map->buf[index] * 255);
}
}
for (int i = 0; i < 3; ++i) {
aom_free(normalized_maps[i]->buf);
aom_free(normalized_maps[i]);
}
double log_sum = 0.0; double sum = 0.0; int block_count = 0;
// Calculate the average superblock sm_scaling_factor for a frame, to be used // for clamping later. for (int row = 0; row < num_sb_rows; ++row) { for (int col = 0; col < num_sb_cols; ++col) { constint index = row * num_sb_cols + col; constdouble saliency = sb_saliency_map->buf[index];
cpi->sm_scaling_factor[index] = 1 - saliency;
sum += cpi->sm_scaling_factor[index];
block_count++;
}
}
assert(block_count > 0);
sum /= block_count;
// Calculate the geometric mean of superblock sm_scaling_factor for a frame, // to be used for normalization. for (int row = 0; row < num_sb_rows; ++row) { for (int col = 0; col < num_sb_cols; ++col) { constint index = row * num_sb_cols + col;
log_sum += log(fmax(cpi->sm_scaling_factor[index], 0.001));
cpi->sm_scaling_factor[index] =
fmax(cpi->sm_scaling_factor[index], 0.8 * sum);
}
}
log_sum = exp(log_sum / block_count);
// Normalize the sm_scaling_factor by geometric mean. for (int row = 0; row < num_sb_rows; ++row) { for (int col = 0; col < num_sb_cols; ++col) { constint index = row * num_sb_cols + col;
assert(log_sum > 0);
cpi->sm_scaling_factor[index] /= log_sum;
// Modulate the sm_scaling_factor by frame basis motion factor
cpi->sm_scaling_factor[index] =
cpi->sm_scaling_factor[index] * motion_ratio;
}
}
// av1_setup_motion_ratio() is only enabled when CONFIG_REALTIME_ONLY is 0, // because the computations need to access the first pass stats which are // only available when CONFIG_REALTIME_ONLY is equal to 0. #if !CONFIG_REALTIME_ONLY // Set motion_ratio that reflects the motion quantities between two consecutive // frames. Motion_ratio will be used to set up saliency_map based rdmult scaling // factor, i.e., the less the motion quantities are, the more bits will be spent // on this frame, and vice versa. double av1_setup_motion_ratio(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common; int frames_since_key =
cm->current_frame.display_order_hint - cpi->rc.frames_since_key; const FIRSTPASS_STATS *cur_stats = av1_firstpass_info_peek(
&cpi->ppi->twopass.firstpass_info, frames_since_key);
assert(cur_stats != NULL);
assert(cpi->ppi->twopass.firstpass_info.total_stats.count > 0);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.