/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src, int sstride, int w, int h); typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src, int w, int h); typedefunsignedint (*VarianceMxNFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsignedint *sse); typedefvoid (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,
uint32_t *sse8x8, int *sum8x8, unsignedint *tot_sse, int *tot_sum,
uint32_t *var8x8); typedefvoid (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,
uint32_t *sse16x16, unsignedint *tot_sse, int *tot_sum,
uint32_t *var16x16); typedefunsignedint (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, int b_stride, unsignedint *sse); typedefunsignedint (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, int b_stride,
uint32_t *sse, const uint8_t *second_pred); typedefunsignedint (*SumOfSquaresFunction)(const int16_t *src);
#if !CONFIG_REALTIME_ONLY typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsignedint *sse); #endif
using libaom_test::ACMRandom;
// Truncate high bit depth results by downshifting (with rounding) by: // 2 * (bit_depth - 8) for sse // (bit_depth - 8) for se staticvoid RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) { switch (bit_depth) { case AOM_BITS_12:
*sse = (*sse + 128) >> 8;
*se = (*se + 8) >> 4; break; case AOM_BITS_10:
*sse = (*sse + 8) >> 4;
*se = (*se + 2) >> 2; break; case AOM_BITS_8: default: break;
}
}
/* Note: * Our codebase calculates the "diff" value in the variance algorithm by * (src - ref).
*/ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, int l2h, int src_stride, int ref_stride,
uint32_t *sse_ptr, bool use_high_bit_depth_,
aom_bit_depth_t bit_depth) {
int64_t se = 0;
uint64_t sse = 0; constint w = 1 << l2w; constint h = 1 << l2h; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int diff; if (!use_high_bit_depth_) {
diff = src[y * src_stride + x] - ref[y * ref_stride + x];
se += diff;
sse += diff * diff;
} else {
diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
se += diff;
sse += diff * diff;
}
}
}
RoundHighBitDepth(bit_depth, &se, &sse);
*sse_ptr = static_cast<uint32_t>(sse); returnstatic_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
}
/* The subpel reference functions differ from the codec version in one aspect: * they calculate the bilinear factors directly instead of using a lookup table * and therefore upshift xoff and yoff by 1. Only every other calculated value * is used so the codec version shrinks the table to save space.
*/ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src, int l2w, int l2h, int xoff, int yoff,
uint32_t *sse_ptr, bool use_high_bit_depth_,
aom_bit_depth_t bit_depth) {
int64_t se = 0;
uint64_t sse = 0; constint w = 1 << l2w; constint h = 1 << l2h;
xoff <<= 1;
yoff <<= 1;
for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // Bilinear interpolation at a 16th pel step. if (!use_high_bit_depth_) { constint a1 = ref[(w + 1) * (y + 0) + x + 0]; constint a2 = ref[(w + 1) * (y + 0) + x + 1]; constint b1 = ref[(w + 1) * (y + 1) + x + 0]; constint b2 = ref[(w + 1) * (y + 1) + x + 1]; constint a = a1 + (((a2 - a1) * xoff + 8) >> 4); constint b = b1 + (((b2 - b1) * xoff + 8) >> 4); constint r = a + (((b - a) * yoff + 8) >> 4); constint diff = r - src[w * y + x];
se += diff;
sse += diff * diff;
} else {
uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
uint16_t *src16 = CONVERT_TO_SHORTPTR(src); constint a1 = ref16[(w + 1) * (y + 0) + x + 0]; constint a2 = ref16[(w + 1) * (y + 0) + x + 1]; constint b1 = ref16[(w + 1) * (y + 1) + x + 0]; constint b2 = ref16[(w + 1) * (y + 1) + x + 1]; constint a = a1 + (((a2 - a1) * xoff + 8) >> 4); constint b = b1 + (((b2 - b1) * xoff + 8) >> 4); constint r = a + (((b - a) * yoff + 8) >> 4); constint diff = r - src16[w * y + x];
se += diff;
sse += diff * diff;
}
}
}
RoundHighBitDepth(bit_depth, &se, &sse);
*sse_ptr = static_cast<uint32_t>(sse); returnstatic_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
}
static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w, int l2h, int xoff, int yoff,
uint32_t *sse_ptr, bool use_high_bit_depth,
aom_bit_depth_t bit_depth) {
int64_t se = 0;
uint64_t sse = 0; constint w = 1 << l2w; constint h = 1 << l2h;
xoff <<= 1;
yoff <<= 1;
for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // bilinear interpolation at a 16th pel step if (!use_high_bit_depth) { constint a1 = ref[(w + 1) * (y + 0) + x + 0]; constint a2 = ref[(w + 1) * (y + 0) + x + 1]; constint b1 = ref[(w + 1) * (y + 1) + x + 0]; constint b2 = ref[(w + 1) * (y + 1) + x + 1]; constint a = a1 + (((a2 - a1) * xoff + 8) >> 4); constint b = b1 + (((b2 - b1) * xoff + 8) >> 4); constint r = a + (((b - a) * yoff + 8) >> 4); constint diff =
((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
se += diff;
sse += diff * diff;
} else { const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); constint a1 = ref16[(w + 1) * (y + 0) + x + 0]; constint a2 = ref16[(w + 1) * (y + 0) + x + 1]; constint b1 = ref16[(w + 1) * (y + 1) + x + 0]; constint b2 = ref16[(w + 1) * (y + 1) + x + 1]; constint a = a1 + (((a2 - a1) * xoff + 8) >> 4); constint b = b1 + (((b2 - b1) * xoff + 8) >> 4); constint r = a + (((b - a) * yoff + 8) >> 4); constint diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
se += diff;
sse += diff * diff;
}
}
}
RoundHighBitDepth(bit_depth, &se, &sse);
*sse_ptr = static_cast<uint32_t>(sse); returnstatic_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
}
#if !CONFIG_REALTIME_ONLY static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h, int xoff, int yoff, const int32_t *wsrc, const int32_t *mask, uint32_t *sse_ptr, bool use_high_bit_depth_,
aom_bit_depth_t bit_depth) {
int64_t se = 0;
uint64_t sse = 0; constint w = 1 << l2w; constint h = 1 << l2h;
xoff <<= 1;
yoff <<= 1;
for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // Bilinear interpolation at a 16th pel step. if (!use_high_bit_depth_) { constint a1 = pre[(w + 1) * (y + 0) + x + 0]; constint a2 = pre[(w + 1) * (y + 0) + x + 1]; constint b1 = pre[(w + 1) * (y + 1) + x + 0]; constint b2 = pre[(w + 1) * (y + 1) + x + 1]; constint a = a1 + (((a2 - a1) * xoff + 8) >> 4); constint b = b1 + (((b2 - b1) * xoff + 8) >> 4); constint r = a + (((b - a) * yoff + 8) >> 4); constint diff = ROUND_POWER_OF_TWO_SIGNED(
wsrc[w * y + x] - r * mask[w * y + x], 12);
se += diff;
sse += diff * diff;
} else {
uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre); constint a1 = pre16[(w + 1) * (y + 0) + x + 0]; constint a2 = pre16[(w + 1) * (y + 0) + x + 1]; constint b1 = pre16[(w + 1) * (y + 1) + x + 0]; constint b2 = pre16[(w + 1) * (y + 1) + x + 1]; constint a = a1 + (((a2 - a1) * xoff + 8) >> 4); constint b = b1 + (((b2 - b1) * xoff + 8) >> 4); constint r = a + (((b - a) * yoff + 8) >> 4); constint diff = ROUND_POWER_OF_TWO_SIGNED(
wsrc[w * y + x] - r * mask[w * y + x], 12);
se += diff;
sse += diff * diff;
}
}
}
RoundHighBitDepth(bit_depth, &se, &sse);
*sse_ptr = static_cast<uint32_t>(sse); returnstatic_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
} #endif
//////////////////////////////////////////////////////////////////////////////// // Encapsulating struct to store the function to test along with // some testing context. // Can be used for MSE, SSE, Variance, etc.
// Main class for testing a function type template <typename FunctionType> class MseWxHTestClass
: public ::testing::TestWithParam<TestParams<FunctionType> > { public: void SetUp() override {
params_ = this->GetParam();
// some relay helpers int block_size() const { return params_.block_size; } int width() const { return params_.width; } int height() const { return params_.height; } int d_stride() const { return params_.width; } // stride is same as width int s_stride() const { return params_.width; } // stride is same as width
};
template <typename MseWxHFunctionType> void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() {
aom_usec_timer ref_timer, test_timer; double elapsed_time_c = 0; double elapsed_time_simd = 0; int run_time = 10000000; int w = width(); int h = height(); int dstride = d_stride(); int sstride = s_stride();
for (int k = 0; k < block_size(); ++k) {
dst_[k] = rnd_.Rand8();
src_[k] = rnd_.Rand8();
}
aom_usec_timer_start(&ref_timer); for (int i = 0; i < run_time; i++) {
aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h);
}
aom_usec_timer_mark(&ref_timer);
elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
aom_usec_timer_start(&test_timer); for (int i = 0; i < run_time; i++) {
params_.func(dst_, dstride, src_, sstride, w, h);
}
aom_usec_timer_mark(&test_timer);
elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
template <typename MseWxHFunctionType> void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() {
uint64_t mse_ref = 0;
uint64_t mse_mod = 0; int w = width(); int h = height(); int dstride = d_stride(); int sstride = s_stride();
for (int i = 0; i < 10; i++) { for (int k = 0; k < block_size(); ++k) {
dst_[k] = rnd_.Rand8();
src_[k] = rnd_.Rand8();
}
API_REGISTER_STATE_CHECK(
mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h));
API_REGISTER_STATE_CHECK(
mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
EXPECT_EQ(mse_ref, mse_mod)
<< "ref mse: " << mse_ref << " mod mse: " << mse_mod;
}
}
template <typename FunctionType> class Mse16xHTestClass
: public ::testing::TestWithParam<TestParams<FunctionType> > { public: // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for // maximum width 16 and maximum height 8. int mem_size = 16 * 8; void SetUp() override {
params_ = this->GetParam();
rnd_.Reset(ACMRandom::DeterministicSeed());
src_ = reinterpret_cast<uint16_t *>(
aom_memalign(16, mem_size * sizeof(*src_)));
dst_ = reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_)));
ASSERT_NE(src_, nullptr);
ASSERT_NE(dst_, nullptr);
}
for (int k = 0; k < mem_size; ++k) {
dst_[k] = rnd_.Rand8(); // Right shift by 6 is done to generate more input in range of [0,255] than // CDEF_VERY_LARGE int rnd_i10 = rnd_.Rand16() >> 6;
src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
}
aom_usec_timer_start(&ref_timer); for (int i = 0; i < loop_count; i++) {
aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h);
}
aom_usec_timer_mark(&ref_timer);
elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
aom_usec_timer_start(&test_timer); for (int i = 0; i < loop_count; i++) {
params_.func(dst_, dstride, src_, w, h);
}
aom_usec_timer_mark(&test_timer);
elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
for (int i = 0; i < 10; i++) { for (int k = 0; k < mem_size; ++k) {
dst_[k] = rnd_.Rand8(); // Right shift by 6 is done to generate more input in range of [0,255] // than CDEF_VERY_LARGE int rnd_i10 = rnd_.Rand16() >> 6;
src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
}
template <typename Mse16xHFunctionType> void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() {
uint64_t mse_ref = 0;
uint64_t mse_mod = 0; constint w = width(); constint h = height(); constint dstride = d_stride(); constint iter = 10;
// Fill the buffers with extreme values for (int i = 0; i < iter; i++) { for (int k = 0; k < mem_size; ++k) {
dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255);
src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE);
}
// Main class for testing a function type template <typename FunctionType> class MainTestClass
: public ::testing::TestWithParam<TestParams<FunctionType> > { public: void SetUp() override {
params_ = this->GetParam();
protected: // We could sub-class MainTestClass into dedicated class for Variance // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing // to access top class fields xxx. That's cumbersome, so for now we'll just // implement the testing methods here:
aom_usec_timer_start(&timer); for (int i = 0; i < run_time; ++i) { int x = rnd_(8); int y = rnd_(8);
params_.func(ref_, width() + 1, x, y, src_, width(), &sse1);
}
aom_usec_timer_mark(&timer);
aom_usec_timer_start(&timer_c); for (int i = 0; i < run_time; ++i) { int x = rnd_(8); int y = rnd_(8);
subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y,
&sse2, use_high_bit_depth(), params_.bit_depth);
}
aom_usec_timer_mark(&timer_c);
template <> void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() { // Pre: Set the first half of values to the maximum, the second half to 0. // Mask: same as above // WSrc: Set the first half of values to 0, the second half to the maximum. for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { constint half = block_size() / 2; if (!use_high_bit_depth()) {
memset(pre_, 255, half);
memset(pre_ + half, 0, half + width() + height() + 1);
} else {
aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0,
half + width() + height() + 1);
} for (int j = 0; j < half; j++) {
wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
mask_[j] = 0;
} for (int j = half; j < block_size(); j++) {
wsrc_[j] = 0;
mask_[j] = kMaskMax * kMaskMax;
}
aom_usec_timer_start(&timer); for (int i = 0; i < run_time; ++i) { int x = rnd_(8); int y = rnd_(8);
API_REGISTER_STATE_CHECK(
params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
}
aom_usec_timer_mark(&timer);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.