// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
// These templates are not found via ADL. using hwy::HWY_NAMESPACE::Add; using hwy::HWY_NAMESPACE::AndNot; using hwy::HWY_NAMESPACE::Eq; using hwy::HWY_NAMESPACE::GetLane;
// Returns number of non-zero coefficients (but skip LLF). // We cannot rely on block[] being all-zero bits, so first truncate to integer. // Also writes the per-8x8 block nzeros starting at nzeros_pos.
int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy, const AcStrategy acs, const size_t covered_blocks, const size_t log2_covered_blocks, const int32_t* JXL_RESTRICT block, const size_t nzeros_stride,
int32_t* JXL_RESTRICT nzeros_pos) { const HWY_CAPPED(int32_t, kBlockDim) di;
constauto zero = Zero(di); // Add FF..FF for every zero coefficient, negate to get #zeros. auto neg_sum_zero = zero;
{ // Mask sufficient for one row of coefficients.
HWY_ALIGN const int32_t
llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
-1, -1, -1, -1}; // First cx=1,2,4 elements are FF..FF, others 0. const int32_t* llf_mask_pos =
llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
// Rows with LLF: mask out the LLF for (size_t y = 0; y < cy; y++) { for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { constauto llf_mask = LoadU(di, llf_mask_pos + x);
// LLF counts as zero so we don't include it in nzeros. constauto coef =
AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
// Remaining rows: no mask for (size_t y = cy; y < cy * kBlockDim; y++) { for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { constauto coef = Load(di, &block[y * cx * kBlockDim + x]);
neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
}
}
// We want area - sum_zero, add because neg_sum_zero is already negated. const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) +
GetLane(SumOfLanes(di, neg_sum_zero));
const int32_t shifted_nzeros = static_cast<int32_t>(
(nzeros + covered_blocks - 1) >> log2_covered_blocks); // Need non-canonicalized dimensions! for (size_t y = 0; y < acs.covered_blocks_y(); y++) { for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
}
}
return nzeros;
}
// Specialization for 8x8, where only top-left is LLF/DC. // About 1% overall speedup vs. NumNonZeroExceptLLF.
int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
int32_t* JXL_RESTRICT nzeros_pos) { const HWY_CAPPED(int32_t, kBlockDim) di;
constauto zero = Zero(di); // Add FF..FF for every zero coefficient, negate to get #zeros. auto neg_sum_zero = zero;
{ // First row has DC, so mask const size_t y = 0;
HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { constauto dc_mask = Load(di, dc_mask_lanes + x);
// DC counts as zero so we don't include it in nzeros. constauto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
// Remaining rows: no mask for (size_t y = 1; y < kBlockDim; y++) { for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { constauto coef = Load(di, &block[y * kBlockDim + x]);
neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
}
}
// We want 64 - sum_zero, add because neg_sum_zero is already negated. const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) +
GetLane(SumOfLanes(di, neg_sum_zero));
*nzeros_pos = nzeros;
return nzeros;
}
// The number of nonzeros of each block is predicted from the top and the left // blocks, with opportune scaling to take into account the number of blocks of // each strategy. The predicted number of nonzeros divided by two is used as a // context; if this number is above 63, a specific context is used. If the // number of nonzeros of a strategy is above 63, it is written directly using a // fixed number of bits (that depends on the size of the strategy).
Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders, const Rect& rect, const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows, const AcStrategyImage& ac_strategy, const YCbCrChromaSubsampling& cs,
Image3I* JXL_RESTRICT tmp_num_nzeroes,
std::vector<Token>* JXL_RESTRICT output, const ImageB& qdc, const ImageI& qf, const BlockCtxMap& block_ctx_map) { const size_t xsize_blocks = rect.xsize(); const size_t ysize_blocks = rect.ysize();
output->clear(); // TODO(user): update the estimate: usually less coefficients are used.
output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.