/* * Copyright (c) 2020, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// Calculate prediction based on the given input features and neural net config. // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden // layer. void av1_nn_predict_neon(constfloat *input_nodes, const NN_CONFIG *const nn_config, int reduce_prec, float *const output) { float buf[2][NN_MAX_NODES_PER_LAYER]; int buf_index = 0; int num_inputs = nn_config->num_inputs; // Hidden layers, except the final iteration is the output layer. for (int layer = 0; layer <= nn_config->num_hidden_layers; layer++) { constfloat *layer_weights = nn_config->weights[layer]; constfloat *layer_bias = nn_config->bias[layer]; bool output_layer = (layer == nn_config->num_hidden_layers); float *const output_nodes = output_layer ? output : buf[buf_index]; constint num_outputs = output_layer ? nn_config->num_outputs
: nn_config->num_hidden_nodes[layer];
if (num_inputs % 4 == 0 && num_outputs % 8 == 0) { for (int out = 0; out < num_outputs; out += 8) {
nn_propagate_4to8(num_inputs, input_nodes,
&layer_weights[out * num_inputs], &layer_bias[out],
&output_nodes[out], output_layer);
}
} elseif (num_inputs % 8 == 0 && num_outputs % 4 == 0) { for (int out = 0; out < num_outputs; out += 4) {
nn_propagate_8to4(num_inputs, input_nodes,
&layer_weights[out * num_inputs], &layer_bias[out],
&output_nodes[out], output_layer);
}
} elseif (num_inputs % 4 == 0 && num_outputs % 4 == 0) { for (int out = 0; out < num_outputs; out += 4) {
nn_propagate_4to4(num_inputs, input_nodes,
&layer_weights[out * num_inputs], &layer_bias[out],
&output_nodes[out], output_layer);
}
} elseif (num_inputs % 8 == 0) { for (int out = 0; out < num_outputs; out++) {
nn_propagate_8to1(num_inputs, input_nodes,
&layer_weights[out * num_inputs], &layer_bias[out],
&output_nodes[out], output_layer);
}
} elseif (num_inputs % 4 == 0) { for (int out = 0; out < num_outputs; out++) {
nn_propagate_4to1(num_inputs, input_nodes,
&layer_weights[out * num_inputs], &layer_bias[out],
&output_nodes[out], output_layer);
}
} elseif (num_inputs > 8) { for (int out = 0; out < num_outputs; out++) {
nn_propagate_xto1(num_inputs, input_nodes,
&layer_weights[out * num_inputs], &layer_bias[out],
&output_nodes[out]);
}
} elseif (num_inputs >= 4) { for (int out = 0; out < num_outputs; out++) {
nn_propagate_xsto1(num_inputs, input_nodes,
&layer_weights[out * num_inputs], &layer_bias[out],
&output_nodes[out]);
}
} else { for (int node = 0; node < num_outputs; ++node) { float val = layer_bias[node]; for (int i = 0; i < num_inputs; ++i)
val += layer_weights[node * num_inputs + i] * input_nodes[i]; // ReLU as activation function.
val = val > 0.0f ? val : 0.0f; // Could use AOMMAX().
output_nodes[node] = val;
}
}
input_nodes = output_nodes;
num_inputs = num_outputs;
buf_index = 1 - buf_index;
} if (reduce_prec) av1_nn_output_prec_reduce(output, nn_config->num_outputs);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.