Quelle cnn.h

Sprache: C

/*
* Copyright (c) 2019, Alliance for Open Media. All rights reserved.
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/

#ifndef AOM_AV1_ENCODER_CNN_H_
#define AOM_AV1_ENCODER_CNN_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <math.h>
#include <stdbool.h>

#include "aom_util/aom_thread.h"
#include "config/av1_rtcd.h"

struct AV1Common;

#define CNN_MAX_HIDDEN_LAYERS 64
#define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1)
#define CNN_MAX_CHANNELS 256
#define CNN_MAX_BRANCHES 4
#define CNN_MAX_THREADS 32

#define NO_BRANCH_CONFIG \
  { 0, 0, 0 }
#define NO_BN_PARAMS \
  { NULL, NULL, NULL, NULL }

enum {
  PADDING_SAME_ZERO,       // tensorflow's SAME padding with pixels outside
                           // the image area assumed to be 0 (default)
  PADDING_SAME_REPLICATE,  // tensorflow's SAME padding with pixels outside
                           // the image area replicated from closest edge
  PADDING_VALID            // tensorflow's VALID padding
} UENUM1BYTE(PADDING_TYPE);

// enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION);

// Times when input tensor may be copied to branches given in input_to_branches.
// BRANCH_NO_COPY: doesn't copy any tensor.
// BRANCH_INPUT: copies the input tensor to branches.
// BRANCH_OUTPUT: copies the convolved tensor to branches.
// BRANCH_COMBINED: copies the combined (after convolving and branch combining)
//   tensor. If no combinations happen at this layer, then this option
//   has the same effect as COPY_OUTPUT.
enum {
  BRANCH_NO_COPY,
  BRANCH_INPUT,
  BRANCH_OUTPUT,
  BRANCH_COMBINED
} UENUM1BYTE(BRANCH_COPY);

// Types of combining branches with output of current layer:
// BRANCH_NOC: no branch combining
// BRANCH_ADD: Add previously stored branch tensor to output of layer
// BRANCH_CAT: Concatenate branch tensor to output of layer
enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE);

// The parameters used to scale each channel in batch
// normalization. The processing in done on a per-channel basis.
// e.g. bn_mean[c] is the mean for all pixels in channel c. This
// is always applied after activation. The output is given by
// out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where
// norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c]
// here we assume that the effect of variance_epsilon is already
// taken into account when bn_std is calculated. The pointers
// needs to be either all zero or all valid. If all zero, then
// batchnorm is disabled, else batchnorm is applied.
struct CNN_BATCHNORM_PARAMS {
  const float *bn_gamma;
  const float *bn_beta;
  const float *bn_mean;
  const float *bn_std;
};

struct CNN_BRANCH_CONFIG {
  int input_to_branches;  // If nonzero, copy the active tensor to the current
  // layer and store for future use in branches
  // specified in the field as a binary mask. For
  // example, if input_to_branch = 0x06, it means the
  // input tensor to the current branch is copied to
  // branches 1 and 2 (where 0 represents the primary
  // branch). One restriction is that the mask
  // cannot indicate copying to the current branch.
  // If greater than 0, only copies the channels up
  // to the given index.
  int channels_to_copy;  // Within the layer, input a copy of active
  // tensor to branches given in input_to_branches.
  int branches_to_combine;  // mask of branches to combine with output of
  // current layer, if
  // branch_combine_type != BRANCH_NOC
  // For example, if branches_to_combine = 0x0A,
  // it means that braches 1 and 3 are combined
  // with the current branch.
};

struct CNN_LAYER_CONFIG {
  int in_channels;
  int filter_width;
  int filter_height;
  int out_channels;
  int skip_width;
  int skip_height;
  int maxpool;            // whether to use maxpool or not (only effective when
                          // skip width or skip_height are > 1)
  const float *weights;   // array of length filter_height x filter_width x
                          // in_channels x out_channels where the inner-most
                          // scan is out_channels and the outer most scan is
                          // filter_height.
  const float *bias;      // array of length out_channels
  PADDING_TYPE pad;       // padding type
  ACTIVATION activation;  // the activation function to use after convolution
  int deconvolve;         // whether this is a deconvolution layer.
                          // 0: If skip_width or skip_height are > 1, then we
                          // reduce resolution
                          // 1: If skip_width or skip_height are > 1, then we
                          // increase resolution
  int branch;             // branch index in [0, CNN_MAX_BRANCHES - 1], where
                          // 0 refers to the primary branch.
  BRANCH_COPY branch_copy_type;
  BRANCH_COMBINE branch_combine_type;
  struct CNN_BRANCH_CONFIG branch_config;
  struct CNN_BATCHNORM_PARAMS
      bn_params;   // A struct that contains the parameters
                   // used for batch normalization.
  int output_num;  // The output buffer idx to which the layer output is
                   // written. Set to -1 to disable writing it to the output. In
                   // the case that branch_combine_type is BRANCH_CAT, all
                   // concatenated channels will be written to output. In the
                   // case of BRANCH_ADD, the output will be the result of
                   // summation.
};

struct CNN_CONFIG {
  int num_layers;  // number of CNN layers ( = number of hidden layers + 1)
  int is_residue;  // whether the output activation is a residue
  int ext_width, ext_height;  // extension horizontally and vertically
  int strict_bounds;          // whether the input bounds are strict or not.
                              // If strict, the extension area is filled by
                              // replication; if not strict, image data is
                              // assumed available beyond the bounds.
  CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS];
};

struct CNN_THREAD_DATA {
  int num_workers;
  AVxWorker *workers;
};

struct CNN_MULTI_OUT {
  int num_outputs;
  const int *output_channels;
  const int *output_strides;
  float **output_buffer;
};

// Function to return size of output
void av1_find_cnn_output_size(int in_width, int in_height,
                              const CNN_CONFIG *cnn_config, int *out_width,
                              int *out_height, int *out_channels);

// Function to return output width and output height of given layer.
void av1_find_cnn_layer_output_size(int in_width, int in_height,
                                    const CNN_LAYER_CONFIG *layer_config,
                                    int *out_width, int *out_height);

// Prediction functions from set of input image buffers. This function supports
// CNN with multiple outputs.
bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
                                   int stride, const CNN_CONFIG *cnn_config,
                                   const CNN_THREAD_DATA *thread_data,
                                   struct CNN_MULTI_OUT *output);
bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
                                          int stride,
                                          const CNN_CONFIG *cnn_config,
                                          const CNN_THREAD_DATA *thread_data,
                                          int bit_depth, CNN_MULTI_OUT *output);
#ifdef __cplusplus
}  // extern "C"
#endif

#endif  // AOM_AV1_ENCODER_CNN_H_

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.30 Sekunden (vorverarbeitet am 2026-04-26) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.