/* * Copyright (c) 2019, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
typedefstruct { constfloat **input; int in_width; int in_height; int in_stride; const CNN_LAYER_CONFIG *layer_config; float **output; int out_stride; int start_idx; int th_step;
} CONVOLVE_OPS;
// The concatenated tensor goes into dst with first the channels in // original dst followed by the channels in the src staticbool concat_tensor(const TENSOR *src, TENSOR *dst) {
assert(src->width == dst->width);
assert(src->height == dst->height);
constint dst_channels = dst->channels; constint channels = dst->channels + src->channels; constint newallocsize = channels * dst->width * dst->height; if (dst->allocsize < newallocsize) {
TENSOR t;
init_tensor(&t); // allocate new buffers and copy first the dst channels if (!realloc_tensor(&t, channels, dst->width, dst->height)) returnfalse;
copy_tensor(dst, dst->channels, 0, &t); // Swap the tensors and free the old buffers
swap_tensor(dst, &t);
free_tensor(&t);
} for (int c = 1; c < channels; ++c)
dst->buf[c] = &dst->buf[0][c * dst->width * dst->height]; // Copy the channels in src after the first dst_channels channels.
copy_tensor(src, src->channels, dst_channels, dst); returntrue;
}
void av1_cnn_add_c(float **output, int channels, int width, int height, int stride, constfloat **add) { for (int c = 0; c < channels; ++c) { for (int i = 0; i < height; ++i) for (int j = 0; j < width; ++j)
output[c][i * stride + j] += add[c][i * stride + j];
}
}
void av1_cnn_activate_c(float **output, int channels, int width, int height, int stride, ACTIVATION layer_activation) { if (layer_activation == RELU) { for (int c = 0; c < channels; ++c) { for (int i = 0; i < height; ++i) for (int j = 0; j < width; ++j)
output[c][i * stride + j] = relu(output[c][i * stride + j]);
}
} elseif (layer_activation == SOFTSIGN) { for (int c = 0; c < channels; ++c) { for (int i = 0; i < height; ++i) for (int j = 0; j < width; ++j)
output[c][i * stride + j] = softsign(output[c][i * stride + j]);
}
} elseif (layer_activation == SIGMOID) {
assert(0 && "Sigmoid has not been supported in CNN."); // TO DO
} elseif (layer_activation != NONE) {
assert(0 && "Unknown activation type");
}
}
staticbool copy_active_tensor_to_branches(const TENSOR *layer_active_tensor, const CNN_LAYER_CONFIG *layer_config, int branch, TENSOR branch_output[]) { const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config; for (int b = 0; b < CNN_MAX_BRANCHES; ++b) { if ((branch_config->input_to_branches & (1 << b)) && b != branch) { // Copy layer's active tensor to output tensor of branch b if set in // mask. The output becomes the input of the first layer of the branch // because the layer of the branch is not the first layer. int copy_channels = branch_config->channels_to_copy > 0
? branch_config->channels_to_copy
: layer_active_tensor->channels; if (!realloc_tensor(&branch_output[b], copy_channels,
layer_active_tensor->width,
layer_active_tensor->height)) { returnfalse;
}
copy_tensor(layer_active_tensor, copy_channels, 0, &branch_output[b]);
}
} returntrue;
}
// CNNConvolve specific to maxpool set as 1, either skip_width or skip_height // greater than 1 and padding equal to PADDING_SAME_ZERO. staticvoid convolve_maxpool_padding_zero( constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, constint cstep, constint filter_width_half, constint filter_height_half) { for (int i = 0; i < layer_config->out_channels; ++i) { for (int h = 0, u = 0; h < in_height; h += layer_config->skip_height, ++u) { for (int w = 0, v = 0; w < in_width; w += layer_config->skip_width, ++v) { for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height);
++hh) { for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width);
++ww) { float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int l = 0; l < layer_config->filter_height; ++l) { constint ii = hh + l - filter_height_half; for (int m = 0; m < layer_config->filter_width;
++m, off += cstep) { constint jj = ww + m - filter_width_half; if (ii < 0 || ii >= in_height || jj < 0 || jj >= in_width) continue;
sum += layer_config->weights[off] *
input[k][ii * in_stride + jj];
}
}
} constfloat a = sum; if (h == hh && w == ww)
output[i][u * out_stride + v] = a; else
output[i][u * out_stride + v] =
AOMMAX(output[i][u * out_stride + v], a);
}
}
}
}
}
}
// CNNConvolve specific to maxpool set as 1, either skip_width or skip_height // greater than 1 and padding equal to PADDING_SAME_REPLICATE. staticvoid convolve_maxpool_padding_replicate( constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, constint cstep, constint filter_width_half, constint filter_height_half) { for (int i = 0; i < layer_config->out_channels; ++i) { for (int h = 0, u = 0; h < in_height; h += layer_config->skip_height, ++u) { for (int w = 0, v = 0; w < in_width; w += layer_config->skip_width, ++v) { for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height);
++hh) { for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width);
++ww) { float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int l = 0; l < layer_config->filter_height; ++l) { constint ii =
CLAMPINDEX(hh + l - filter_height_half, in_height); for (int m = 0; m < layer_config->filter_width;
++m, off += cstep) { constint jj =
CLAMPINDEX(ww + m - filter_width_half, in_width);
assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width);
sum += layer_config->weights[off] *
input[k][ii * in_stride + jj];
}
}
} constfloat a = sum; if (h == hh && w == ww)
output[i][u * out_stride + v] = a; else
output[i][u * out_stride + v] =
AOMMAX(output[i][u * out_stride + v], a);
}
}
}
}
}
}
// CNNConvolve specific to maxpool set as 1, either skip_width or skip_height // greater than 1 and padding equal to PADDING_VALID. staticvoid convolve_maxpool_padding_valid( constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, constint cstep) { for (int i = 0; i < layer_config->out_channels; ++i) { for (int h = 0, u = 0; h < in_height - layer_config->filter_height + 1;
h += layer_config->skip_height, ++u) { for (int w = 0, v = 0; w < in_width - layer_config->filter_width + 1;
w += layer_config->skip_width, ++v) { for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height);
++hh) { for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width);
++ww) { float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int l = 0; l < layer_config->filter_height; ++l) { constint ii = hh + l; for (int m = 0; m < layer_config->filter_width;
++m, off += cstep) { constint jj = ww + m;
assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width);
sum += layer_config->weights[off] *
input[k][ii * in_stride + jj];
}
}
} constfloat a = sum; if (h == hh && w == ww)
output[i][u * out_stride + v] = a; else
output[i][u * out_stride + v] =
AOMMAX(output[i][u * out_stride + v], a);
}
}
}
}
}
}
// CNNConvolve specific to maxpool set as 0 with filter_height and filter_width // equal to 1. staticvoid convolve_element_wise(constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, int start_idx, int step) { constint start_h = get_start_shift_convolve(
in_height, layer_config->filter_height, layer_config->skip_height); constint start_w =
get_start_shift_convolve(in_width, layer_config->filter_width,
layer_config->skip_width) +
start_idx * layer_config->skip_width; constint out_w_step = AOMMAX(step, 1); constint in_w_step = layer_config->skip_width * out_w_step; for (int i = 0; i < layer_config->out_channels; ++i) { for (int h = start_h, u = 0; h < in_height;
h += layer_config->skip_height, ++u) { constint in_h = h * in_stride; constint out_h = u * out_stride + start_idx; for (int w = start_w, out_index = out_h; w < in_width;
w += in_w_step, out_index += out_w_step) { float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) {
sum += layer_config->weights[k * layer_config->out_channels + i] *
input[k][in_h + w];
}
output[i][out_index] = sum;
}
}
}
}
// CNNConvolve specific to maxpool set as 0 and padding equal to // PADDING_SAME_ZERO. staticvoid convolve_no_maxpool_padding_zero( constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, int start_idx, constint cstep, constint filter_width_half, constint filter_height_half, constint ii_shift, constint jj_shift, constint channel_step) { constint start_h = get_start_shift_convolve(
in_height, layer_config->filter_height, layer_config->skip_height); constint start_w = get_start_shift_convolve(
in_width, layer_config->filter_width, layer_config->skip_width); constint end_ii_shift = filter_height_half + 1; constint end_jj_shift = filter_width_half + 1; // *_filter_margin stores the number of pixels along a dimension in the // intersection of the complement of the image in the extended image // and the filter. constint top_filter_margin = layer_config->filter_width * ii_shift; constint right_filter_margin = end_jj_shift - in_width; for (int i = start_idx; i < layer_config->out_channels; i += channel_step) { for (int h = start_h, u = 0; h < in_height;
h += layer_config->skip_height, ++u) { constint out_h = u * out_stride; constint top_cstep =
AOMMAX(0, top_filter_margin - h * layer_config->filter_width) *
cstep +
i; constint start_ii = AOMMAX(0, h - ii_shift); constint end_ii = AOMMIN(in_height, h + end_ii_shift); for (int w = start_w, out_index = out_h; w < in_width;
w += layer_config->skip_width, ++out_index) { constint left_cstep = AOMMAX(0, jj_shift - w) * cstep; constint right_cstep = AOMMAX(0, right_filter_margin + w) * cstep; constint start_jj = AOMMAX(0, w - jj_shift); constint end_jj = AOMMIN(in_width, w + end_jj_shift); float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + top_cstep; for (int ii = start_ii; ii < end_ii; ++ii) {
off += left_cstep; for (int jj = start_jj; jj < end_jj; ++jj, off += cstep) {
sum += layer_config->weights[off] * input[k][ii * in_stride + jj];
}
off += right_cstep;
}
}
output[i][out_index] = sum;
}
}
}
}
// CNNConvolve specific to maxpool set as 0 and padding equal to // PADDING_SAME_REPLICATE. staticvoid convolve_no_maxpool_padding_replicate( constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, int start_idx, constint cstep, constint ii_shift, constint jj_shift, constint channel_step) { // h and w are shifted to an offset coordinate system to reduce in-loop // computation. constint start_h =
get_start_shift_convolve(in_height, layer_config->filter_height,
layer_config->skip_height) -
ii_shift; constint start_w =
get_start_shift_convolve(in_width, layer_config->filter_width,
layer_config->skip_width) -
jj_shift; constint end_h = in_height - ii_shift; constint end_w = in_width - jj_shift; for (int i = start_idx; i < layer_config->out_channels; i += channel_step) { for (int h = start_h, u = 0; h < end_h;
h += layer_config->skip_height, ++u) { constint out_h = u * out_stride; constint upper_ii_index = layer_config->filter_height + h; for (int w = start_w, out_index = out_h; w < end_w;
w += layer_config->skip_width, ++out_index) { constint upper_jj_index = layer_config->filter_width + w; float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int ii = h; ii < upper_ii_index; ++ii) { constint clamped_ii = CLAMPINDEX(ii, in_height); for (int jj = w; jj < upper_jj_index; ++jj) { constint clamped_jj = CLAMPINDEX(jj, in_width);
assert(clamped_ii >= 0 && clamped_ii < in_height &&
clamped_jj >= 0 && clamped_jj < in_width);
sum += layer_config->weights[off] *
input[k][clamped_ii * in_stride + clamped_jj];
off += cstep;
}
}
}
output[i][out_index] = sum;
}
}
}
}
// CNNConvolve specific to maxpool set as 0 and padding equal to // PADDING_VALID. void av1_cnn_convolve_no_maxpool_padding_valid_c( constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride, int start_idx, int cstep, int channel_step) {
assert((layer_config->skip_height == 1 && layer_config->skip_width == 1) ||
!layer_config->maxpool);
assert(layer_config->filter_height > 1 || layer_config->filter_width > 1);
assert(layer_config->pad == PADDING_VALID); for (int i = start_idx; i < layer_config->out_channels; i += channel_step) { for (int h = 0, u = 0; h < in_height - layer_config->filter_height + 1;
h += layer_config->skip_height, ++u) { constint out_h = u * out_stride; constint upper_ii_index = layer_config->filter_height + h; for (int w = 0, out_index = out_h;
w < in_width - layer_config->filter_width + 1;
w += layer_config->skip_width, ++out_index) { constint upper_jj_index = layer_config->filter_width + w; float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int ii = h; ii < upper_ii_index; ++ii) { for (int jj = w; jj < upper_jj_index; ++jj) {
assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width);
sum += layer_config->weights[off] * input[k][ii * in_stride + jj];
off += cstep;
}
}
}
output[i][out_index] = sum;
}
}
}
}
staticvoid av1_cnn_convolve(constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride, int start_idx, int step) {
assert(!layer_config->deconvolve); constint cstep = layer_config->in_channels * layer_config->out_channels; constint filter_height_half = layer_config->filter_height >> 1; constint filter_width_half = layer_config->filter_width >> 1; constint channel_step = AOMMAX(step, 1);
for (int row = 0; row < height; row++) { for (int col = 0; col < width; col++) {
image_row[col] =
ch_gamma * (image_row[col] - ch_mean) / ch_std + ch_beta;
}
image_row += stride;
}
}
}
void av1_cnn_deconvolve_c(constfloat **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride) {
assert(layer_config->deconvolve);
int out_width = 0; int out_height = 0;
av1_find_cnn_layer_output_size(in_width, in_height, layer_config, &out_width,
&out_height); switch (layer_config->pad) { case PADDING_SAME_ZERO: for (int i = 0; i < layer_config->out_channels; ++i) { for (int u = 0; u < out_height; ++u) { for (int v = 0; v < out_width; ++v) { float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int l = 0; l < layer_config->filter_height; ++l) { constint h =
u - l +
get_start_shift_deconvolve(layer_config->filter_height,
layer_config->skip_height); for (int m = 0; m < layer_config->filter_width;
++m, off += cstep) { constint w =
v - m +
get_start_shift_deconvolve(layer_config->filter_width,
layer_config->skip_width); if ((h % layer_config->skip_height) != 0 ||
(w % layer_config->skip_width) != 0) continue; constint ii = h / layer_config->skip_height; constint jj = w / layer_config->skip_width; if (ii < 0 || ii >= in_height || jj < 0 || jj >= in_width) continue;
sum += layer_config->weights[off] *
input[k][ii * in_stride + jj];
}
}
}
output[i][u * out_stride + v] = sum;
}
}
} break; case PADDING_SAME_REPLICATE: for (int i = 0; i < layer_config->out_channels; ++i) { for (int u = 0; u < out_height; ++u) { for (int v = 0; v < out_width; ++v) { float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int l = 0; l < layer_config->filter_height; ++l) { constint h =
u - l +
get_start_shift_deconvolve(layer_config->filter_height,
layer_config->skip_height); for (int m = 0; m < layer_config->filter_width;
++m, off += cstep) { constint w =
v - m +
get_start_shift_deconvolve(layer_config->filter_width,
layer_config->skip_width); if ((h % layer_config->skip_height) != 0 ||
(w % layer_config->skip_width) != 0) continue; constint ii =
CLAMPINDEX(h / layer_config->skip_height, in_height); constint jj =
CLAMPINDEX(w / layer_config->skip_width, in_width);
assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width);
sum += layer_config->weights[off] *
input[k][ii * in_stride + jj];
}
}
}
output[i][u * out_stride + v] = sum;
}
}
} break; case PADDING_VALID: for (int i = 0; i < layer_config->out_channels; ++i) { for (int u = 0; u < out_height; ++u) { for (int v = 0; v < out_width; ++v) { float sum = layer_config->bias[i]; for (int k = 0; k < layer_config->in_channels; ++k) { int off = k * layer_config->out_channels + i; for (int l = 0; l < layer_config->filter_height; ++l) { constint h = u - l; for (int m = 0; m < layer_config->filter_width;
++m, off += cstep) { constint w = v - m; if ((h % layer_config->skip_height) != 0 ||
(w % layer_config->skip_width) != 0) continue; constint ii = h / layer_config->skip_height; constint jj = w / layer_config->skip_width; if (ii < 0 || ii >= in_height || jj < 0 || jj >= in_width) continue;
sum += layer_config->weights[off] *
input[k][ii * in_stride + jj];
}
}
}
output[i][u * out_stride + v] = sum;
}
}
} break; default: assert(0 && "Unknown padding type");
}
}
int i_width = in_width; int i_height = in_height; int o_width = 0, o_height = 0; for (int b = 0; b < CNN_MAX_BRANCHES; ++b) {
init_tensor(&tensor1[b]);
init_tensor(&tensor2[b]);
}
// If we are combining branches make sure that the branch to combine // is different from the current branch.
assert(IMPLIES(layer_config->branch_combine_type != BRANCH_NOC,
!(branch_config->branches_to_combine & (1 << branch))));
if (layer_config->branch_copy_type == BRANCH_INPUT) { if (!copy_active_tensor_to_branches(&tensor1[branch], layer_config,
branch, tensor2)) { goto Error;
}
} // Check consistency of input and output channels
assert(tensor1[branch].channels == layer_config->in_channels);
assert(tensor2[branch].channels == layer_config->out_channels);
// Concatenate tensors if (layer_config->branch_combine_type == BRANCH_CAT) { if (output_num == -1) { // Non-output layer for (int b = 0; b < CNN_MAX_BRANCHES; ++b) { if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]));
assert(tensor2[b].channels > 0); if (!concat_tensor(&tensor2[b], &tensor2[branch])) goto Error;
}
}
} else { // Output layer constint existing_channels = tensor2[branch].channels; int num_chs = existing_channels; for (int b = 0; b < CNN_MAX_BRANCHES; ++b) { if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch])); // Needed only to assign the new channel buffers
num_chs += tensor2[b].channels;
}
}
assign_tensor(&tensor2[branch], output[output_num], num_chs, o_width,
o_height, out_stride[output_num]);
num_chs = existing_channels; for (int b = 0; b < CNN_MAX_BRANCHES; ++b) { if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch])); // Needed only to assign the new channel buffers
copy_tensor(&tensor2[b], tensor2[b].channels, num_chs,
&tensor2[branch]);
num_chs += tensor2[b].channels;
}
}
}
}
if (layer_config->branch_copy_type == BRANCH_COMBINED) { if (!copy_active_tensor_to_branches(&tensor2[branch], layer_config,
branch, tensor2)) { goto Error;
}
}
}
success = true;
Error: for (int b = 0; b < CNN_MAX_BRANCHES; ++b) {
free_tensor(&tensor1[b]);
free_tensor(&tensor2[b]);
} return success;
}
// Assume output already has proper allocation // Assume input image buffers all have same resolution and strides bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, int stride, const CNN_CONFIG *cnn_config, const CNN_THREAD_DATA *thread_data,
CNN_MULTI_OUT *output) { constfloat max_val = 255.0;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.