Quelle saliency_map.c Sprache: C

/*
* Copyright (c) 2023, Alliance for Open Media. All rights reserved.
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <float.h>
#include <string.h>

#include "av1/encoder/encoder.h"
#include "av1/encoder/encoder_utils.h"
#include "av1/encoder/firstpass.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/saliency_map.h"

// The Gabor filter is generated by setting the parameters as:
// ksize = 9
// sigma = 1
// theta = y*np.pi/4, where y /in {0, 1, 2, 3}, i.e., 0, 45, 90, 135 degree
// lambda1 = 1
// gamma=0.8
// phi =0
static const double kGaborFilter[4][9][9] = {  // [angle: 0, 45, 90, 135
                                               // degree][ksize][ksize]
  { { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03,
      3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 },
    { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02,
      3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 },
    { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01,
      1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 },
    { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01,
      4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 },
    { 3.3546262e-04, 1.1108996e-02, 1.3533528e-01, 6.0653067e-01, 1.0000000e+00,
      6.0653067e-01, 1.3533528e-01, 1.1108996e-02, 3.3546262e-04 },
    { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01,
      4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 },
    { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01,
      1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 },
    { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02,
      3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 },
    { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03,
      3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 } },

  { { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04,
      6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05,
      3.5712848e-05 },
    { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03,
      1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03,
      -8.1631159e-05 },
    { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02,
      -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03,
      -9.9486928e-04 },
    { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01,
      -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02,
      1.3962291e-03 },
    { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01,
      1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02,
      6.6981313e-04 },
    { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01,
      -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03,
      -4.4602581e-04 },
    { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02,
      -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03,
      3.0079011e-06 },
    { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02,
      1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06,
      3.8760313e-06 },
    { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03,
      6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06,
      -6.2165498e-08 } },

  { { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04,
      2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 },
    { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02,
      8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 },
    { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01,
      9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 },
    { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01,
      4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 },
    { 5.9760227e-03, 5.6134764e-02, 2.7803731e-01, 7.2614902e-01, 1.0000000e+00,
      7.2614902e-01, 2.7803731e-01, 5.6134764e-02, 5.9760227e-03 },
    { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01,
      4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 },
    { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01,
      9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 },
    { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02,
      8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 },
    { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04,
      2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 } },

  { { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03,
      6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06,
      -6.2165498e-08 },
    { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02,
      1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06,
      3.8760313e-06 },
    { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02,
      -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03,
      3.0079011e-06 },
    { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01,
      -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03,
      -4.4602581e-04 },
    { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01,
      1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02,
      6.6981313e-04 },
    { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01,
      -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02,
      1.3962291e-03 },
    { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02,
      -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03,
      -9.9486928e-04 },
    { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03,
      1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03,
      -8.1631159e-05 },
    { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04,
      6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05,
      3.5712848e-05 } }
};

// This function is to extract red/green/blue channels, and calculate intensity
// = (r+g+b)/3. Note that it only handles 8bits case now.
// TODO(linzhen): add high bitdepth support.
static void get_color_intensity(const YV12_BUFFER_CONFIG *src,
                                int subsampling_x, int subsampling_y,
                                double *cr, double *cg, double *cb,
                                double *intensity) {
  const uint8_t *y = src->buffers[0];
  const uint8_t *u = src->buffers[1];
  const uint8_t *v = src->buffers[2];

  const int y_height = src->crop_heights[0];
  const int y_width = src->crop_widths[0];
  const int y_stride = src->strides[0];
  const int c_stride = src->strides[1];

  for (int i = 0; i < y_height; ++i) {
    for (int j = 0; j < y_width; ++j) {
      cr[i * y_width + j] =
          fclamp((double)y[i * y_stride + j] +
                     1.370 * (double)(v[(i >> subsampling_y) * c_stride +
                                        (j >> subsampling_x)] -
                                      128),
                 0, 255);
      cg[i * y_width + j] =
          fclamp((double)y[i * y_stride + j] -
                     0.698 * (double)(u[(i >> subsampling_y) * c_stride +
                                        (j >> subsampling_x)] -
                                      128) -
                     0.337 * (double)(v[(i >> subsampling_y) * c_stride +
                                        (j >> subsampling_x)] -
                                      128),
                 0, 255);
      cb[i * y_width + j] =
          fclamp((double)y[i * y_stride + j] +
                     1.732 * (double)(u[(i >> subsampling_y) * c_stride +
                                        (j >> subsampling_x)] -
                                      128),
                 0, 255);

      intensity[i * y_width + j] =
          (cr[i * y_width + j] + cg[i * y_width + j] + cb[i * y_width + j]) /
          3.0;
      assert(intensity[i * y_width + j] >= 0 &&
             intensity[i * y_width + j] <= 255);

      intensity[i * y_width + j] /= 256;
      cr[i * y_width + j] /= 256;
      cg[i * y_width + j] /= 256;
      cb[i * y_width + j] /= 256;
    }
  }
}

static inline double convolve_map(const double *filter, const double *map,
                                  const int size) {
  double result = 0;
  for (int i = 0; i < size; ++i) {
    result += filter[i] * map[i];  // symmetric filter is used
  }
  return result;
}

// This function is to decimate the map by half, and apply Gaussian filter on
// top of the downsampled map.
static inline void decimate_map(const double *map, int height, int width,
                                int stride, double *downsampled_map) {
  const int new_width = width / 2;
  const int window_size = 5;
  const double gaussian_filter[25] = {
    1. / 256, 1.0 / 64, 3. / 128, 1. / 64,  1. / 256, 1. / 64, 1. / 16,
    3. / 32,  1. / 16,  1. / 64,  3. / 128, 3. / 32,  9. / 64, 3. / 32,
    3. / 128, 1. / 64,  1. / 16,  3. / 32,  1. / 16,  1. / 64, 1. / 256,
    1. / 64,  3. / 128, 1. / 64,  1. / 256
  };

  double map_region[25];
  for (int y = 0; y < height - 1; y += 2) {
    for (int x = 0; x < width - 1; x += 2) {
      int i = 0;
      for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) {
        for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) {
          int yvalue = clamp(yy, 0, height - 1);
          int xvalue = clamp(xx, 0, width - 1);
          map_region[i++] = map[yvalue * stride + xvalue];
        }
      }
      downsampled_map[(y / 2) * new_width + (x / 2)] =
          convolve_map(gaussian_filter, map_region, window_size * window_size);
    }
  }
}

// This function is to upscale the map from in_level size to out_level size.
// Note that the map at "level-1" will upscale the map at "level" by x2.
static inline int upscale_map(const double *input, int in_level, int out_level,
                              int height[9], int width[9], double *output) {
  for (int level = in_level; level > out_level; level--) {
    const int cur_width = width[level];
    const int cur_height = height[level];
    const int cur_stride = width[level];

    double *original = (level == in_level) ? (double *)input : output;

    assert(level > 0);

    const int h_upscale = height[level - 1];
    const int w_upscale = width[level - 1];
    const int s_upscale = width[level - 1];

    double *upscale = aom_malloc(h_upscale * w_upscale * sizeof(*upscale));

    if (!upscale) {
      return 0;
    }

    for (int i = 0; i < h_upscale; ++i) {
      for (int j = 0; j < w_upscale; ++j) {
        const int ii = clamp((i >> 1), 0, cur_height - 1);
        const int jj = clamp((j >> 1), 0, cur_width - 1);
        upscale[j + i * s_upscale] = (double)original[jj + ii * cur_stride];
      }
    }
    memcpy(output, upscale, h_upscale * w_upscale * sizeof(double));
    aom_free(upscale);
  }

  return 1;
}

// This function calculates the differences between a fine scale c and a
// coarser scale s yielding the feature maps. c \in {2, 3, 4}, and s = c +
// delta, where delta \in {3, 4}.
static int center_surround_diff(const double *input[9], int height[9],
                                int width[9], saliency_feature_map *output[6]) {
  int j = 0;
  for (int k = 2; k < 5; ++k) {
    int cur_height = height[k];
    int cur_width = width[k];

    if (upscale_map(input[k + 3], k + 3, k, height, width, output[j]->buf) ==
        0) {
      return 0;
    }

    for (int r = 0; r < cur_height; ++r) {
      for (int c = 0; c < cur_width; ++c) {
        output[j]->buf[r * cur_width + c] =
            fabs((double)(input[k][r * cur_width + c] -
                          output[j]->buf[r * cur_width + c]));
      }
    }

    if (upscale_map(input[k + 4], k + 4, k, height, width,
                    output[j + 1]->buf) == 0) {
      return 0;
    }

    for (int r = 0; r < cur_height; ++r) {
      for (int c = 0; c < cur_width; ++c) {
        output[j + 1]->buf[r * cur_width + c] =
            fabs(input[k][r * cur_width + c] -
                 output[j + 1]->buf[r * cur_width + c]);
      }
    }

    j += 2;
  }
  return 1;
}

// For color channels, the differences is calculated based on "color
// double-opponency". For example, the RG feature map is constructed between a
// fine scale c of R-G component and a coarser scale s of G-R component.
static int center_surround_diff_rgb(const double *input_1[9],
                                    const double *input_2[9], int height[9],
                                    int width[9],
                                    saliency_feature_map *output[6]) {
  int j = 0;
  for (int k = 2; k < 5; ++k) {
    int cur_height = height[k];
    int cur_width = width[k];

    if (upscale_map(input_2[k + 3], k + 3, k, height, width, output[j]->buf) ==
        0) {
      return 0;
    }

    for (int r = 0; r < cur_height; ++r) {
      for (int c = 0; c < cur_width; ++c) {
        output[j]->buf[r * cur_width + c] =
            fabs((double)(input_1[k][r * cur_width + c] -
                          output[j]->buf[r * cur_width + c]));
      }
    }

    if (upscale_map(input_2[k + 4], k + 4, k, height, width,
                    output[j + 1]->buf) == 0) {
      return 0;
    }

    for (int r = 0; r < cur_height; ++r) {
      for (int c = 0; c < cur_width; ++c) {
        output[j + 1]->buf[r * cur_width + c] =
            fabs(input_1[k][r * cur_width + c] -
                 output[j + 1]->buf[r * cur_width + c]);
      }
    }

    j += 2;
  }
  return 1;
}

// This function is to generate Gaussian pyramid images with indexes from 0 to
// 8, and construct the feature maps from calculating the center-surround
// differences.
static int gaussian_pyramid(const double *src, int width[9], int height[9],
                            saliency_feature_map *dst[6]) {
  double *gaussian_map[9];  // scale = 9
  gaussian_map[0] =
      (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0]));
  if (!gaussian_map[0]) {
    return 0;
  }

  memcpy(gaussian_map[0], src, width[0] * height[0] * sizeof(double));

  for (int i = 1; i < 9; ++i) {
    int stride = width[i - 1];
    int new_width = width[i];
    int new_height = height[i];

    gaussian_map[i] =
        (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i]));

    if (!gaussian_map[i]) {
      for (int l = 0; l < i; ++l) {
        aom_free(gaussian_map[l]);
      }
      return 0;
    }

    memset(gaussian_map[i], 0, new_width * new_height * sizeof(double));

    decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride,
                 gaussian_map[i]);
  }

  if (center_surround_diff((const double **)gaussian_map, height, width, dst) ==
      0) {
    for (int l = 0; l < 9; ++l) {
      aom_free(gaussian_map[l]);
    }
    return 0;
  }

  for (int i = 0; i < 9; ++i) {
    aom_free(gaussian_map[i]);
  }
  return 1;
}

static int gaussian_pyramid_rgb(double *src_1, double *src_2, int width[9],
                                int height[9], saliency_feature_map *dst[6]) {
  double *gaussian_map[2][9];  // scale = 9
  double *src[2];

  src[0] = src_1;
  src[1] = src_2;

  for (int k = 0; k < 2; ++k) {
    gaussian_map[k][0] = (double *)aom_malloc(width[0] * height[0] *
                                              sizeof(*gaussian_map[k][0]));
    if (!gaussian_map[k][0]) {
      for (int l = 0; l < k; ++l) {
        aom_free(gaussian_map[l][0]);
      }
      return 0;
    }
    memcpy(gaussian_map[k][0], src[k], width[0] * height[0] * sizeof(double));

    for (int i = 1; i < 9; ++i) {
      int stride = width[i - 1];
      int new_width = width[i];
      int new_height = height[i];

      gaussian_map[k][i] = (double *)aom_malloc(new_width * new_height *
                                                sizeof(*gaussian_map[k][i]));
      if (!gaussian_map[k][i]) {
        for (int l = 0; l < k; ++l) {
          aom_free(gaussian_map[l][i]);
        }
        return 0;
      }
      memset(gaussian_map[k][i], 0, new_width * new_height * sizeof(double));
      decimate_map(gaussian_map[k][i - 1], height[i - 1], width[i - 1], stride,
                   gaussian_map[k][i]);
    }
  }

  if (center_surround_diff_rgb((const double **)gaussian_map[0],
                               (const double **)gaussian_map[1], height, width,
                               dst) == 0) {
    for (int l = 0; l < 2; ++l) {
      for (int i = 0; i < 9; ++i) {
        aom_free(gaussian_map[l][i]);
      }
    }
    return 0;
  }

  for (int l = 0; l < 2; ++l) {
    for (int i = 0; i < 9; ++i) {
      aom_free(gaussian_map[l][i]);
    }
  }
  return 1;
}

static int get_feature_map_intensity(double *intensity, int width[9],
                                     int height[9],
                                     saliency_feature_map *i_map[6]) {
  if (gaussian_pyramid(intensity, width, height, i_map) == 0) {
    return 0;
  }
  return 1;
}

static int get_feature_map_rgb(double *cr, double *cg, double *cb, int width[9],
                               int height[9], saliency_feature_map *rg_map[6],
                               saliency_feature_map *by_map[6]) {
  double *rg_mat = aom_malloc(height[0] * width[0] * sizeof(*rg_mat));
  double *by_mat = aom_malloc(height[0] * width[0] * sizeof(*by_mat));
  double *gr_mat = aom_malloc(height[0] * width[0] * sizeof(*gr_mat));
  double *yb_mat = aom_malloc(height[0] * width[0] * sizeof(*yb_mat));

  if (!rg_mat || !by_mat || !gr_mat || !yb_mat) {
    aom_free(rg_mat);
    aom_free(by_mat);
    aom_free(gr_mat);
    aom_free(yb_mat);
    return 0;
  }

  double r, g, b, y;
  for (int i = 0; i < height[0]; ++i) {
    for (int j = 0; j < width[0]; ++j) {
      r = AOMMAX(0, cr[i * width[0] + j] -
                        (cg[i * width[0] + j] + cb[i * width[0] + j]) / 2);
      g = AOMMAX(0, cg[i * width[0] + j] -
                        (cr[i * width[0] + j] + cb[i * width[0] + j]) / 2);
      b = AOMMAX(0, cb[i * width[0] + j] -
                        (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2);
      y = AOMMAX(0, (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2 -
                        fabs(cr[i * width[0] + j] - cg[i * width[0] + j]) / 2 -
                        cb[i * width[0] + j]);

      rg_mat[i * width[0] + j] = r - g;
      by_mat[i * width[0] + j] = b - y;
      gr_mat[i * width[0] + j] = g - r;
      yb_mat[i * width[0] + j] = y - b;
    }
  }

  if (gaussian_pyramid_rgb(rg_mat, gr_mat, width, height, rg_map) == 0 ||
      gaussian_pyramid_rgb(by_mat, yb_mat, width, height, by_map) == 0) {
    aom_free(rg_mat);
    aom_free(by_mat);
    aom_free(gr_mat);
    aom_free(yb_mat);
    return 0;
  }

  aom_free(rg_mat);
  aom_free(by_mat);
  aom_free(gr_mat);
  aom_free(yb_mat);
  return 1;
}

static inline void filter2d(const double *input, const double kernel[9][9],
                            int width, int height, double *output) {
  const int window_size = 9;
  double map_section[81];
  for (int y = 0; y <= height - 1; ++y) {
    for (int x = 0; x <= width - 1; ++x) {
      int i = 0;
      for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) {
        for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) {
          int yvalue = clamp(yy, 0, height - 1);
          int xvalue = clamp(xx, 0, width - 1);
          map_section[i++] = input[yvalue * width + xvalue];
        }
      }

      output[y * width + x] = 0;
      for (int k = 0; k < window_size; ++k) {
        for (int l = 0; l < window_size; ++l) {
          output[y * width + x] +=
              kernel[k][l] * map_section[k * window_size + l];
        }
      }
    }
  }
}

static int get_feature_map_orientation(const double *intensity, int width[9],
                                       int height[9],
                                       saliency_feature_map *dst[24]) {
  double *gaussian_map[9];

  gaussian_map[0] =
      (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0]));
  if (!gaussian_map[0]) {
    return 0;
  }
  memcpy(gaussian_map[0], intensity, width[0] * height[0] * sizeof(double));

  for (int i = 1; i < 9; ++i) {
    int stride = width[i - 1];
    int new_width = width[i];
    int new_height = height[i];

    gaussian_map[i] =
        (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i]));
    if (!gaussian_map[i]) {
      for (int l = 0; l < i; ++l) {
        aom_free(gaussian_map[l]);
      }
      return 0;
    }
    memset(gaussian_map[i], 0, new_width * new_height * sizeof(double));
    decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride,
                 gaussian_map[i]);
  }

  double *tempGaborOutput[4][9];  //[angle: 0, 45, 90, 135 degree][filter_size]

  for (int i = 2; i < 9; ++i) {
    const int cur_height = height[i];
    const int cur_width = width[i];
    for (int j = 0; j < 4; ++j) {
      tempGaborOutput[j][i] = (double *)aom_malloc(
          cur_height * cur_width * sizeof(*tempGaborOutput[j][i]));
      if (!tempGaborOutput[j][i]) {
        for (int l = 0; l < 9; ++l) {
          aom_free(gaussian_map[l]);
        }
        for (int h = 0; h < 4; ++h) {
          for (int g = 2; g < 9; ++g) {
            aom_free(tempGaborOutput[h][g]);
          }
        }
        return 0;
      }
      filter2d(gaussian_map[i], kGaborFilter[j], cur_width, cur_height,
               tempGaborOutput[j][i]);
    }
  }

  for (int i = 0; i < 9; ++i) {
    aom_free(gaussian_map[i]);
  }

  saliency_feature_map
      *tmp[4][6];  //[angle: 0, 45, 90, 135 degree][filter_size]

  for (int i = 0; i < 6; ++i) {
    for (int j = 0; j < 4; ++j) {
      tmp[j][i] = dst[j * 6 + i];
    }
  }

  for (int j = 0; j < 4; ++j) {
    if (center_surround_diff((const double **)tempGaborOutput[j], height, width,
                             tmp[j]) == 0) {
      for (int h = 0; h < 4; ++h) {
        for (int g = 2; g < 9; ++g) {
          aom_free(tempGaborOutput[h][g]);
        }
      }
      return 0;
    }
  }

  for (int i = 2; i < 9; ++i) {
    for (int j = 0; j < 4; ++j) {
      aom_free(tempGaborOutput[j][i]);
    }
  }

  return 1;
}

static inline void find_min_max(const saliency_feature_map *input,
                                double *max_value, double *min_value) {
  assert(input && input->buf);
  *min_value = DBL_MAX;
  *max_value = 0.0;

  for (int i = 0; i < input->height; ++i) {
    for (int j = 0; j < input->width; ++j) {
      assert(input->buf[i * input->width + j] >= 0.0);
      *min_value = fmin(input->buf[i * input->width + j], *min_value);
      *max_value = fmax(input->buf[i * input->width + j], *max_value);
    }
  }
}

static inline double average_local_max(const saliency_feature_map *input,
                                       int stepsize) {
  int numlocal = 0;
  double lmaxmean = 0, lmax = 0, dummy = 0;
  saliency_feature_map local_map;
  local_map.height = stepsize;
  local_map.width = stepsize;
  local_map.buf =
      (double *)aom_malloc(stepsize * stepsize * sizeof(*local_map.buf));

  if (!local_map.buf) {
    return -1;
  }

  for (int y = 0; y < input->height - stepsize; y += stepsize) {
    for (int x = 0; x < input->width - stepsize; x += stepsize) {
      for (int i = 0; i < stepsize; ++i) {
        for (int j = 0; j < stepsize; ++j) {
          local_map.buf[i * stepsize + j] =
              input->buf[(y + i) * input->width + x + j];
        }
      }

      find_min_max(&local_map, &lmax, &dummy);
      lmaxmean += lmax;
      numlocal++;
    }
  }

  aom_free(local_map.buf);

  return lmaxmean / numlocal;
}

// Linear normalization the values in the map to [0,1].
static void minmax_normalize(saliency_feature_map *input) {
  double max_value, min_value;
  find_min_max(input, &max_value, &min_value);

  for (int i = 0; i < input->height; ++i) {
    for (int j = 0; j < input->width; ++j) {
      if (max_value != min_value) {
        input->buf[i * input->width + j] =
            input->buf[i * input->width + j] / (max_value - min_value) +
            min_value / (min_value - max_value);
      } else {
        input->buf[i * input->width + j] -= min_value;
      }
    }
  }
}

// This function is to promote meaningful “activation spots” in the map and
// ignores homogeneous areas.
static int nomalization_operator(saliency_feature_map *input, int stepsize) {
  minmax_normalize(input);
  double lmaxmean = average_local_max(input, stepsize);
  if (lmaxmean < 0) {
    return 0;
  }
  double normCoeff = (1 - lmaxmean) * (1 - lmaxmean);

  for (int i = 0; i < input->height; ++i) {
    for (int j = 0; j < input->width; ++j) {
      input->buf[i * input->width + j] *= normCoeff;
    }
  }

  return 1;
}

// Normalize the values in feature maps to [0,1], and then upscale all maps to
// the original frame size.
static int normalize_fm(saliency_feature_map *input[6], int width[9],
                        int height[9], int num_fm,
                        saliency_feature_map *output[6]) {
  // Feature maps (FM) are generated by function "center_surround_diff()". The
  // difference is between a fine scale c and a coarser scale s, where c \in {2,
  // 3, 4}, and s = c + delta, where delta \in {3, 4}, and the FM size is scale
  // c. Specifically, i=0: c=2 and s=5, i=1: c=2 and s=6, i=2: c=3 and s=6, i=3:
  // c=3 and s=7, i=4: c=4 and s=7, i=5: c=4 and s=8.
  for (int i = 0; i < num_fm; ++i) {
    if (nomalization_operator(input[i], 8) == 0) {
      return 0;
    }

    // Upscale FM to original frame size
    if (upscale_map(input[i]->buf, (i / 2) + 2, 0, height, width,
                    output[i]->buf) == 0) {
      return 0;
    }
  }
  return 1;
}

// Combine feature maps with the same category (intensity, color, or
// orientation) into one conspicuity map.
static int normalized_map(saliency_feature_map *input[6], int width[9],
                          int height[9], saliency_feature_map *output) {
  int num_fm = 6;

  saliency_feature_map *n_input[6];
  for (int i = 0; i < 6; ++i) {
    n_input[i] = (saliency_feature_map *)aom_malloc(sizeof(*n_input[i]));
    if (!n_input[i]) {
      return 0;
    }
    n_input[i]->buf =
        (double *)aom_malloc(width[0] * height[0] * sizeof(*n_input[i]->buf));
    if (!n_input[i]->buf) {
      aom_free(n_input[i]);
      return 0;
    }
    n_input[i]->height = height[0];
    n_input[i]->width = width[0];
  }

  if (normalize_fm(input, width, height, num_fm, n_input) == 0) {
    for (int i = 0; i < num_fm; ++i) {
      aom_free(n_input[i]->buf);
      aom_free(n_input[i]);
    }
    return 0;
  }

  // Add up all normalized feature maps with the same category into one map.
  for (int i = 0; i < num_fm; ++i) {
    for (int r = 0; r < height[0]; ++r) {
      for (int c = 0; c < width[0]; ++c) {
        output->buf[r * width[0] + c] += n_input[i]->buf[r * width[0] + c];
      }
    }
  }

  for (int i = 0; i < num_fm; ++i) {
    aom_free(n_input[i]->buf);
    aom_free(n_input[i]);
  }

  nomalization_operator(output, 8);
  return 1;
}

static int normalized_map_rgb(saliency_feature_map *rg_map[6],
                              saliency_feature_map *by_map[6], int width[9],
                              int height[9], saliency_feature_map *output) {
  saliency_feature_map *color_cm[2];  // 0: color_cm_rg, 1: color_cm_by
  for (int i = 0; i < 2; ++i) {
    color_cm[i] = aom_malloc(sizeof(*color_cm[i]));
    if (!color_cm[i]) {
      return 0;
    }
    color_cm[i]->buf =
        (double *)aom_malloc(width[0] * height[0] * sizeof(*color_cm[i]->buf));
    if (!color_cm[i]->buf) {
      for (int l = 0; l < i; ++l) {
        aom_free(color_cm[l]->buf);
      }
      aom_free(color_cm[i]);
      return 0;
    }

    color_cm[i]->width = width[0];
    color_cm[i]->height = height[0];
    memset(color_cm[i]->buf, 0,
           width[0] * height[0] * sizeof(*color_cm[i]->buf));
  }

  if (normalized_map(rg_map, width, height, color_cm[0]) == 0 ||
      normalized_map(by_map, width, height, color_cm[1]) == 0) {
    for (int i = 0; i < 2; ++i) {
      aom_free(color_cm[i]->buf);
      aom_free(color_cm[i]);
    }
    return 0;
  }

  for (int r = 0; r < height[0]; ++r) {
    for (int c = 0; c < width[0]; ++c) {
      output->buf[r * width[0] + c] = color_cm[0]->buf[r * width[0] + c] +
                                      color_cm[1]->buf[r * width[0] + c];
    }
  }

  for (int i = 0; i < 2; ++i) {
    aom_free(color_cm[i]->buf);
    aom_free(color_cm[i]);
  }

  nomalization_operator(output, 8);
  return 1;
}

static int normalized_map_orientation(saliency_feature_map *orientation_map[24],
                                      int width[9], int height[9],
                                      saliency_feature_map *output) {
  int num_fms_per_angle = 6;

  saliency_feature_map *ofm[4][6];
  for (int i = 0; i < num_fms_per_angle; ++i) {
    for (int j = 0; j < 4; ++j) {
      ofm[j][i] = orientation_map[j * num_fms_per_angle + i];
    }
  }

  // extract conspicuity map for each angle
  saliency_feature_map *nofm = aom_malloc(sizeof(*nofm));
  if (!nofm) {
    return 0;
  }
  nofm->buf = (double *)aom_malloc(width[0] * height[0] * sizeof(*nofm->buf));
  if (!nofm->buf) {
    aom_free(nofm);
    return 0;
  }
  nofm->height = height[0];
  nofm->width = width[0];

  for (int i = 0; i < 4; ++i) {
    memset(nofm->buf, 0, width[0] * height[0] * sizeof(*nofm->buf));
    if (normalized_map(ofm[i], width, height, nofm) == 0) {
      aom_free(nofm->buf);
      aom_free(nofm);
      return 0;
    }

    for (int r = 0; r < height[0]; ++r) {
      for (int c = 0; c < width[0]; ++c) {
        output->buf[r * width[0] + c] += nofm->buf[r * width[0] + c];
      }
    }
  }

  aom_free(nofm->buf);
  aom_free(nofm);

  nomalization_operator(output, 8);
  return 1;
}

// Set pixel level saliency mask based on Itti-Koch algorithm
int av1_set_saliency_map(AV1_COMP *cpi) {
  AV1_COMMON *const cm = &cpi->common;

  int frm_width = cm->width;
  int frm_height = cm->height;

  int pyr_height[9];
  int pyr_width[9];

  pyr_height[0] = frm_height;
  pyr_width[0] = frm_width;

  for (int i = 1; i < 9; ++i) {
    pyr_width[i] = pyr_width[i - 1] / 2;
    pyr_height[i] = pyr_height[i - 1] / 2;
  }

  double *cr = aom_malloc(frm_width * frm_height * sizeof(*cr));
  double *cg = aom_malloc(frm_width * frm_height * sizeof(*cg));
  double *cb = aom_malloc(frm_width * frm_height * sizeof(*cb));
  double *intensity = aom_malloc(frm_width * frm_height * sizeof(*intensity));

  if (!cr || !cg || !cb || !intensity) {
    aom_free(cr);
    aom_free(cg);
    aom_free(cb);
    aom_free(intensity);
    return 0;
  }

  // Extract red / green / blue channels and intensity component
  get_color_intensity(cpi->source, cm->seq_params->subsampling_x,
                      cm->seq_params->subsampling_y, cr, cg, cb, intensity);

  // Feature Map Extraction
  // intensity map
  saliency_feature_map *i_map[6];
  for (int i = 0; i < 6; ++i) {
    int cur_height = pyr_height[(i / 2) + 2];
    int cur_width = pyr_width[(i / 2) + 2];

    i_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*i_map[i]));
    if (!i_map[i]) {
      aom_free(cr);
      aom_free(cg);
      aom_free(cb);
      aom_free(intensity);
      for (int l = 0; l < i; ++l) {
        aom_free(i_map[l]);
      }
      return 0;
    }
    i_map[i]->buf =
        (double *)aom_malloc(cur_height * cur_width * sizeof(*i_map[i]->buf));
    if (!i_map[i]->buf) {
      aom_free(cr);
      aom_free(cg);
      aom_free(cb);
      aom_free(intensity);
      for (int l = 0; l < i; ++l) {
        aom_free(i_map[l]->buf);
        aom_free(i_map[l]);
      }
      return 0;
    }
    i_map[i]->height = cur_height;
    i_map[i]->width = cur_width;
  }

  if (get_feature_map_intensity(intensity, pyr_width, pyr_height, i_map) == 0) {
    aom_free(cr);
    aom_free(cg);
    aom_free(cb);
    aom_free(intensity);
    for (int l = 0; l < 6; ++l) {
      aom_free(i_map[l]->buf);
      aom_free(i_map[l]);
    }
    return 0;
  }

  // RGB map
  saliency_feature_map *rg_map[6], *by_map[6];
  for (int i = 0; i < 6; ++i) {
    int cur_height = pyr_height[(i / 2) + 2];
    int cur_width = pyr_width[(i / 2) + 2];
    rg_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*rg_map[i]));
    by_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*by_map[i]));
    if (!rg_map[i] || !by_map[i]) {
      aom_free(cr);
      aom_free(cg);
      aom_free(cb);
      aom_free(intensity);
      for (int l = 0; l < 6; ++l) {
        aom_free(i_map[l]->buf);
        aom_free(i_map[l]);
        aom_free(rg_map[l]);
        aom_free(by_map[l]);
      }
      return 0;
    }
    rg_map[i]->buf =
        (double *)aom_malloc(cur_height * cur_width * sizeof(*rg_map[i]->buf));
    by_map[i]->buf =
        (double *)aom_malloc(cur_height * cur_width * sizeof(*by_map[i]->buf));
    if (!by_map[i]->buf || !rg_map[i]->buf) {
      aom_free(cr);
      aom_free(cg);
      aom_free(cb);
      aom_free(intensity);
      for (int l = 0; l < 6; ++l) {
        aom_free(i_map[l]->buf);
        aom_free(i_map[l]);
      }
      for (int l = 0; l < i; ++l) {
        aom_free(rg_map[l]->buf);
        aom_free(by_map[l]->buf);
        aom_free(rg_map[l]);
        aom_free(by_map[l]);
      }
      return 0;
    }
    rg_map[i]->height = cur_height;
    rg_map[i]->width = cur_width;
    by_map[i]->height = cur_height;
    by_map[i]->width = cur_width;
  }

  if (get_feature_map_rgb(cr, cg, cb, pyr_width, pyr_height, rg_map, by_map) ==
      0) {
    aom_free(cr);
    aom_free(cg);
    aom_free(cb);
    aom_free(intensity);
    for (int l = 0; l < 6; ++l) {
      aom_free(i_map[l]->buf);
      aom_free(rg_map[l]->buf);
      aom_free(by_map[l]->buf);
      aom_free(i_map[l]);
      aom_free(rg_map[l]);
      aom_free(by_map[l]);
    }
    return 0;
  }

  // Orientation map
  saliency_feature_map *orientation_map[24];
  for (int i = 0; i < 24; ++i) {
    int cur_height = pyr_height[((i % 6) / 2) + 2];
    int cur_width = pyr_width[((i % 6) / 2) + 2];

    orientation_map[i] =
        (saliency_feature_map *)aom_malloc(sizeof(*orientation_map[i]));
    if (!orientation_map[i]) {
      aom_free(cr);
      aom_free(cg);
      aom_free(cb);
      aom_free(intensity);
      for (int l = 0; l < 6; ++l) {
        aom_free(i_map[l]->buf);
        aom_free(rg_map[l]->buf);
        aom_free(by_map[l]->buf);
        aom_free(i_map[l]);
        aom_free(rg_map[l]);
        aom_free(by_map[l]);
      }
      for (int h = 0; h < i; ++h) {
        aom_free(orientation_map[h]);
      }
      return 0;
    }

    orientation_map[i]->buf = (double *)aom_malloc(
        cur_height * cur_width * sizeof(*orientation_map[i]->buf));
    if (!orientation_map[i]->buf) {
      aom_free(cr);
      aom_free(cg);
      aom_free(cb);
      aom_free(intensity);
      for (int l = 0; l < 6; ++l) {
        aom_free(i_map[l]->buf);
        aom_free(rg_map[l]->buf);
        aom_free(by_map[l]->buf);
        aom_free(i_map[l]);
        aom_free(rg_map[l]);
        aom_free(by_map[l]);
      }

      for (int h = 0; h < i; ++h) {
        aom_free(orientation_map[h]->buf);
        aom_free(orientation_map[h]->buf);
        aom_free(orientation_map[h]);
        aom_free(orientation_map[h]);
      }
      return 0;
    }

    orientation_map[i]->height = cur_height;
    orientation_map[i]->width = cur_width;
  }

  if (get_feature_map_orientation(intensity, pyr_width, pyr_height,
                                  orientation_map) == 0) {
    aom_free(cr);
    aom_free(cg);
    aom_free(cb);
    aom_free(intensity);
    for (int l = 0; l < 6; ++l) {
      aom_free(i_map[l]->buf);
      aom_free(rg_map[l]->buf);
      aom_free(by_map[l]->buf);
      aom_free(i_map[l]);
      aom_free(rg_map[l]);
      aom_free(by_map[l]);
    }
    for (int h = 0; h < 24; ++h) {
      aom_free(orientation_map[h]->buf);
      aom_free(orientation_map[h]);
    }
    return 0;
  }

  aom_free(cr);
  aom_free(cg);
  aom_free(cb);
  aom_free(intensity);

  saliency_feature_map
      *normalized_maps[3];  // 0: intensity, 1: color, 2: orientation

  for (int i = 0; i < 3; ++i) {
    normalized_maps[i] = aom_malloc(sizeof(*normalized_maps[i]));
    if (!normalized_maps[i]) {
      for (int l = 0; l < 6; ++l) {
        aom_free(i_map[l]->buf);
        aom_free(rg_map[l]->buf);
        aom_free(by_map[l]->buf);
        aom_free(i_map[l]);
        aom_free(rg_map[l]);
        aom_free(by_map[l]);
      }

      for (int h = 0; h < 24; ++h) {
        aom_free(orientation_map[h]->buf);
        aom_free(orientation_map[h]);
      }

      for (int l = 0; l < i; ++l) {
        aom_free(normalized_maps[l]);
      }
      return 0;
    }
    normalized_maps[i]->buf = (double *)aom_malloc(
        frm_width * frm_height * sizeof(*normalized_maps[i]->buf));
    if (!normalized_maps[i]->buf) {
      for (int l = 0; l < 6; ++l) {
        aom_free(i_map[l]->buf);
        aom_free(rg_map[l]->buf);
        aom_free(by_map[l]->buf);
        aom_free(i_map[l]);
        aom_free(rg_map[l]);
        aom_free(by_map[l]);
      }
      for (int h = 0; h < 24; ++h) {
        aom_free(orientation_map[h]->buf);
        aom_free(orientation_map[h]);
      }
      for (int l = 0; l < i; ++l) {
        aom_free(normalized_maps[l]->buf);
        aom_free(normalized_maps[l]);
      }
      return 0;
    }
    normalized_maps[i]->width = frm_width;
    normalized_maps[i]->height = frm_height;
    memset(normalized_maps[i]->buf, 0,
           frm_width * frm_height * sizeof(*normalized_maps[i]->buf));
  }

  // Conspicuity map generation
  if (normalized_map(i_map, pyr_width, pyr_height, normalized_maps[0]) == 0 ||
      normalized_map_rgb(rg_map, by_map, pyr_width, pyr_height,
                         normalized_maps[1]) == 0 ||
      normalized_map_orientation(orientation_map, pyr_width, pyr_height,
                                 normalized_maps[2]) == 0) {
    for (int i = 0; i < 6; ++i) {
      aom_free(i_map[i]->buf);
      aom_free(rg_map[i]->buf);
      aom_free(by_map[i]->buf);
      aom_free(i_map[i]);
      aom_free(rg_map[i]);
      aom_free(by_map[i]);
    }

    for (int i = 0; i < 24; ++i) {
      aom_free(orientation_map[i]->buf);
      aom_free(orientation_map[i]);
    }

    for (int i = 0; i < 3; ++i) {
      aom_free(normalized_maps[i]->buf);
      aom_free(normalized_maps[i]);
    }
    return 0;
  }

  for (int i = 0; i < 6; ++i) {
    aom_free(i_map[i]->buf);
    aom_free(rg_map[i]->buf);
    aom_free(by_map[i]->buf);
    aom_free(i_map[i]);
    aom_free(rg_map[i]);
    aom_free(by_map[i]);
  }

  for (int i = 0; i < 24; ++i) {
    aom_free(orientation_map[i]->buf);
    aom_free(orientation_map[i]);
  }

  // Pixel level saliency map
  saliency_feature_map *combined_saliency_map =
      aom_malloc(sizeof(*combined_saliency_map));
  if (!combined_saliency_map) {
    for (int i = 0; i < 3; ++i) {
      aom_free(normalized_maps[i]->buf);
      aom_free(normalized_maps[i]);
    }
    return 0;
  }

  combined_saliency_map->buf = (double *)aom_malloc(
      frm_width * frm_height * sizeof(*combined_saliency_map->buf));
  if (!combined_saliency_map->buf) {
    for (int i = 0; i < 3; ++i) {
      aom_free(normalized_maps[i]->buf);
      aom_free(normalized_maps[i]);
    }

    aom_free(combined_saliency_map);
    return 0;
  }
  combined_saliency_map->height = frm_height;
  combined_saliency_map->width = frm_width;

  double w_intensity, w_color, w_orient;

  w_intensity = w_color = w_orient = (double)1 / 3;

  for (int r = 0; r < frm_height; ++r) {
    for (int c = 0; c < frm_width; ++c) {
      combined_saliency_map->buf[r * frm_width + c] =
          (w_intensity * normalized_maps[0]->buf[r * frm_width + c] +
           w_color * normalized_maps[1]->buf[r * frm_width + c] +
           w_orient * normalized_maps[2]->buf[r * frm_width + c]);
    }
  }

  for (int r = 0; r < frm_height; ++r) {
    for (int c = 0; c < frm_width; ++c) {
      int index = r * frm_width + c;
      cpi->saliency_map[index] =
          (uint8_t)(combined_saliency_map->buf[index] * 255);
    }
  }

  for (int i = 0; i < 3; ++i) {
    aom_free(normalized_maps[i]->buf);
    aom_free(normalized_maps[i]);
  }

  aom_free(combined_saliency_map->buf);
  aom_free(combined_saliency_map);

  return 1;
}

// Set superblock level saliency mask for rdmult scaling
int av1_setup_sm_rdmult_scaling_factor(AV1_COMP *cpi, double motion_ratio) {
  AV1_COMMON *cm = &cpi->common;

  saliency_feature_map *sb_saliency_map =
      aom_malloc(sizeof(saliency_feature_map));

  if (sb_saliency_map == NULL) {
    return 0;
  }

  const BLOCK_SIZE bsize = cm->seq_params->sb_size;
  const int num_mi_w = mi_size_wide[bsize];
  const int num_mi_h = mi_size_high[bsize];
  const int block_width = block_size_wide[bsize];
  const int block_height = block_size_high[bsize];
  const int num_sb_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
  const int num_sb_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;

  sb_saliency_map->height = num_sb_rows;
  sb_saliency_map->width = num_sb_cols;
  sb_saliency_map->buf = (double *)aom_malloc(num_sb_rows * num_sb_cols *
                                              sizeof(*sb_saliency_map->buf));

  if (sb_saliency_map->buf == NULL) {
    aom_free(sb_saliency_map);
    return 0;
  }

  for (int row = 0; row < num_sb_rows; ++row) {
    for (int col = 0; col < num_sb_cols; ++col) {
      const int index = row * num_sb_cols + col;
      double total_pixel = 0;
      double total_weight = 0;

      for (int i = 0; i < block_height; i++) {
        for (int j = 0; j < block_width; j++) {
          if ((row * block_height + i) >= cpi->common.height ||
              (col * block_width + j) >= cpi->common.width)
            continue;
          total_pixel++;
          total_weight +=
              cpi->saliency_map[(row * block_height + i) * cpi->common.width +
                                col * block_width + j];
        }
      }

      assert(total_pixel > 0);

      // Calculate the superblock level saliency map from pixel level saliency
      // map
      sb_saliency_map->buf[index] = total_weight / total_pixel;

      // Further lower the superblock saliency score for boundary superblocks.
      if (row < 1 || row > num_sb_rows - 2 || col < 1 ||
          col > num_sb_cols - 2) {
        sb_saliency_map->buf[index] /= 5;
      }
    }
  }

  // superblock level saliency map finalization
  minmax_normalize(sb_saliency_map);

  double log_sum = 0.0;
  double sum = 0.0;
  int block_count = 0;

  // Calculate the average superblock sm_scaling_factor for a frame, to be used
  // for clamping later.
  for (int row = 0; row < num_sb_rows; ++row) {
    for (int col = 0; col < num_sb_cols; ++col) {
      const int index = row * num_sb_cols + col;
      const double saliency = sb_saliency_map->buf[index];

      cpi->sm_scaling_factor[index] = 1 - saliency;
      sum += cpi->sm_scaling_factor[index];
      block_count++;
    }
  }
  assert(block_count > 0);
  sum /= block_count;

  // Calculate the geometric mean of superblock sm_scaling_factor for a frame,
  // to be used for normalization.
  for (int row = 0; row < num_sb_rows; ++row) {
    for (int col = 0; col < num_sb_cols; ++col) {
      const int index = row * num_sb_cols + col;
      log_sum += log(fmax(cpi->sm_scaling_factor[index], 0.001));
      cpi->sm_scaling_factor[index] =
          fmax(cpi->sm_scaling_factor[index], 0.8 * sum);
    }
  }

  log_sum = exp(log_sum / block_count);

  // Normalize the sm_scaling_factor by geometric mean.
  for (int row = 0; row < num_sb_rows; ++row) {
    for (int col = 0; col < num_sb_cols; ++col) {
      const int index = row * num_sb_cols + col;
      assert(log_sum > 0);
      cpi->sm_scaling_factor[index] /= log_sum;

      // Modulate the sm_scaling_factor by frame basis motion factor
      cpi->sm_scaling_factor[index] =
          cpi->sm_scaling_factor[index] * motion_ratio;
    }
  }

  aom_free(sb_saliency_map->buf);
  aom_free(sb_saliency_map);
  return 1;
}

// av1_setup_motion_ratio() is only enabled when CONFIG_REALTIME_ONLY is 0,
// because the computations need to access the first pass stats which are
// only available when CONFIG_REALTIME_ONLY is equal to 0.
#if !CONFIG_REALTIME_ONLY
// Set motion_ratio that reflects the motion quantities between two consecutive
// frames. Motion_ratio will be used to set up saliency_map based rdmult scaling
// factor, i.e., the less the motion quantities are, the more bits will be spent
// on this frame, and vice versa.
double av1_setup_motion_ratio(AV1_COMP *cpi) {
  AV1_COMMON *cm = &cpi->common;
  int frames_since_key =
      cm->current_frame.display_order_hint - cpi->rc.frames_since_key;
  const FIRSTPASS_STATS *cur_stats = av1_firstpass_info_peek(
      &cpi->ppi->twopass.firstpass_info, frames_since_key);
  assert(cur_stats != NULL);
  assert(cpi->ppi->twopass.firstpass_info.total_stats.count > 0);

  const double avg_intra_error =
      exp(cpi->ppi->twopass.firstpass_info.total_stats.log_intra_error /
          cpi->ppi->twopass.firstpass_info.total_stats.count);
  const double avg_inter_error =
      exp(cpi->ppi->twopass.firstpass_info.total_stats.log_coded_error /
          cpi->ppi->twopass.firstpass_info.total_stats.count);

  double inter_error = cur_stats->coded_error;
  double error_stdev = 0;
  const double avg_error =
      cpi->ppi->twopass.firstpass_info.total_stats.intra_error /
      cpi->ppi->twopass.firstpass_info.total_stats.count;
  for (int i = 0; i < cpi->ppi->twopass.firstpass_info.total_stats.count; i++) {
    const FIRSTPASS_STATS *stats =
        &cpi->ppi->twopass.firstpass_info.stats_buf[i];
    error_stdev +=
        (stats->intra_error - avg_error) * (stats->intra_error - avg_error);
  }
  error_stdev =
      sqrt(error_stdev / cpi->ppi->twopass.firstpass_info.total_stats.count);

  double motion_ratio = 1;
  if (error_stdev / fmax(avg_intra_error, 1) > 0.1) {
    motion_ratio = inter_error / fmax(1, avg_inter_error);
    motion_ratio = AOMMIN(motion_ratio, 1.5);
    motion_ratio = AOMMAX(motion_ratio, 0.8);
  }

  return motion_ratio;
}
#endif  // !CONFIG_REALTIME_ONLY

Messung V0.5

¤ Dauer der Verarbeitung: 0.22 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.