// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
auto bcm = *block_ctx_map;
bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap));
bcm.num_ctxs = 2;
bcm.num_dc_ctxs = 1; return;
} if (cparams.speed_tier >= SpeedTier::kFalcon) { return;
} // No need to change context modeling for small images.
size_t tot = rqf.xsize() * rqf.ysize();
size_t size_for_ctx_model = (1 << 10) * cparams.butteraugli_distance; if (tot < size_for_ctx_model) return;
struct OccCounters { // count the occurrences of each qf value and each strategy type.
OccCounters(const ImageI& rqf, const AcStrategyImage& ac_strategy) { for (size_t y = 0; y < rqf.ysize(); y++) { const int32_t* qf_row = rqf.Row(y);
AcStrategyRow acs_row = ac_strategy.ConstRow(y); for (size_t x = 0; x < rqf.xsize(); x++) { int ord = kStrategyOrder[acs_row[x].RawStrategy()]; int qf = qf_row[x] - 1;
qf_counts[qf]++;
qf_ord_counts[ord][qf]++;
ord_counts[ord]++;
}
}
}
size_t qf_counts[256] = {};
size_t qf_ord_counts[kNumOrders][256] = {};
size_t ord_counts[kNumOrders] = {};
}; // The OccCounters struct is too big to allocate on the stack.
std::unique_ptr<OccCounters> counters(new OccCounters(rqf, ac_strategy));
// Splitting the context model according to the quantization field seems to // mostly benefit only large images.
size_t size_for_qf_split = (1 << 13) * cparams.butteraugli_distance;
size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2;
std::vector<uint32_t>& qft = block_ctx_map->qf_thresholds;
qft.clear(); // Divide the quant field in up to num_qf_segments segments.
size_t cumsum = 0;
size_t next = 1;
size_t last_cut = 256;
size_t cut = tot * next / num_qf_segments; for (uint32_t j = 0; j < 256; j++) {
cumsum += counters->qf_counts[j]; if (cumsum > cut) { if (j != 0) {
qft.push_back(j);
}
last_cut = j; while (cumsum > cut) {
next++;
cut = tot * next / num_qf_segments;
}
} elseif (next > qft.size() + 1) { if (j - 1 == last_cut && j != 0) {
qft.push_back(j);
}
}
}
// Count the occurrences of each segment.
std::vector<size_t> counts(kNumOrders * (qft.size() + 1));
size_t qft_pos = 0; for (size_t j = 0; j < 256; j++) { if (qft_pos < qft.size() && j == qft[qft_pos]) {
qft_pos++;
} for (size_t i = 0; i < kNumOrders; i++) {
counts[qft_pos + i * (qft.size() + 1)] += counters->qf_ord_counts[i][j];
}
}
// Repeatedly merge the lowest-count pair.
std::vector<uint8_t> remap((qft.size() + 1) * kNumOrders);
std::iota(remap.begin(), remap.end(), 0);
std::vector<uint8_t> clusters(remap);
size_t nb_clusters =
Clamp1(static_cast<int>(tot / size_for_ctx_model / 2), 2, 9);
size_t nb_clusters_chroma =
Clamp1(static_cast<int>(tot / size_for_ctx_model / 3), 1, 5); // This is O(n^2 log n), but n is small. while (clusters.size() > nb_clusters) {
std::sort(clusters.begin(), clusters.end(),
[&](int a, int b) { return counts[a] > counts[b]; });
counts[clusters[clusters.size() - 2]] += counts[clusters.back()];
counts[clusters.back()] = 0;
remap[clusters.back()] = clusters[clusters.size() - 2];
clusters.pop_back();
} for (size_t i = 0; i < remap.size(); i++) { while (remap[remap[i]] != remap[i]) {
remap[i] = remap[remap[i]];
}
} // Relabel starting from 0.
std::vector<uint8_t> remap_remap(remap.size(), remap.size());
size_t num = 0; for (size_t i = 0; i < remap.size(); i++) { if (remap_remap[remap[i]] == remap.size()) {
remap_remap[remap[i]] = num++;
}
remap[i] = remap_remap[remap[i]];
} // Write the block context map. auto& ctx_map = block_ctx_map->ctx_map;
ctx_map = remap;
ctx_map.resize(remap.size() * 3); // for chroma, only use up to nb_clusters_chroma separate block contexts // (those for the biggest clusters) for (size_t i = remap.size(); i < remap.size() * 3; i++) {
ctx_map[i] = num + Clamp1(static_cast<int>(remap[i % remap.size()]), 0, static_cast<int>(nb_clusters_chroma) - 1);
}
block_ctx_map->num_ctxs =
*std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
}
void StoreMin2(constfloat v, float& min1, float& min2) { if (v < min2) { if (v < min1) {
min2 = min1;
min1 = v;
} else {
min2 = v;
}
}
}
void CreateMask(const ImageF& image, ImageF& mask) { for (size_t y = 0; y < image.ysize(); y++) { constauto* row_n = y > 0 ? image.Row(y - 1) : image.Row(y); constauto* row_in = image.Row(y); constauto* row_s = y + 1 < image.ysize() ? image.Row(y + 1) : image.Row(y); auto* row_out = mask.Row(y); for (size_t x = 0; x < image.xsize(); x++) { // Center, west, east, north, south values and their absolute difference float c = row_in[x]; float w = x > 0 ? row_in[x - 1] : row_in[x]; float e = x + 1 < image.xsize() ? row_in[x + 1] : row_in[x]; float n = row_n[x]; float s = row_s[x]; float dw = std::abs(c - w); float de = std::abs(c - e); float dn = std::abs(c - n); float ds = std::abs(c - s); float min = std::numeric_limits<float>::max(); float min2 = std::numeric_limits<float>::max();
StoreMin2(dw, min, min2);
StoreMin2(de, min, min2);
StoreMin2(dn, min, min2);
StoreMin2(ds, min, min2);
row_out[x] = min2;
}
}
}
// Downsamples the image by a factor of 2 with a kernel that's sharper than // the standard 2x2 box kernel used by DownsampleImage. // The kernel is optimized against the result of the 2x2 upsampling kernel used // by the decoder. Ringing is slightly reduced by clamping the values of the // resulting pixels within certain bounds of a small region in the original // image.
Status DownsampleImage2_Sharper(const ImageF& input, ImageF* output) { const int64_t kernelx = 12; const int64_t kernely = 12;
JxlMemoryManager* memory_manager = input.memory_manager();
for (size_t y = 0; y < output->ysize(); y++) { float* row_out = output->Row(y); constfloat* row_in[kernely]; constfloat* row_mask = mask.Row(y); // get the rows in the support for (size_t ky = 0; ky < kernely; ky++) {
int64_t iy = y * 2 + ky - (kernely - 1) / 2; if (iy < 0) iy = 0; if (iy >= ysize) iy = ysize - 1;
row_in[ky] = input.Row(iy);
}
for (size_t x = 0; x < output->xsize(); x++) { // get min and max values of the original image in the support float min = std::numeric_limits<float>::max(); float max = std::numeric_limits<float>::min(); // kernelx - R and kernely - R are the radius of a rectangular region in // which the values of a pixel are bounded to reduce ringing. static constexpr int64_t R = 5; for (int64_t ky = R; ky + R < kernely; ky++) { for (int64_t kx = R; kx + R < kernelx; kx++) {
int64_t ix = x * 2 + kx - (kernelx - 1) / 2; if (ix < 0) ix = 0; if (ix >= xsize) ix = xsize - 1;
min = std::min<float>(min, row_in[ky][ix]);
max = std::max<float>(max, row_in[ky][ix]);
}
}
float sum = 0; for (int64_t ky = 0; ky < kernely; ky++) { for (int64_t kx = 0; kx < kernelx; kx++) {
int64_t ix = x * 2 + kx - (kernelx - 1) / 2; if (ix < 0) ix = 0; if (ix >= xsize) ix = xsize - 1;
sum += row_in[ky][ix] * kernel[ky * kernelx + kx];
}
}
row_out[x] = sum;
// Clamp the pixel within the value of a small area to prevent ringning. // The mask determines how much to clamp, clamp more to reduce more // ringing in smooth areas, clamp less in noisy areas to get more // sharpness. Higher mask_multiplier gives less clamping, so less // ringing reduction. const constexpr float mask_multiplier = 1; float a = row_mask[x] * mask_multiplier; float clip_min = min - a; float clip_max = max + a; if (row_out[x] < clip_min) {
row_out[x] = clip_min;
} elseif (row_out[x] > clip_max) {
row_out[x] = clip_max;
}
}
} returntrue;
}
} // namespace
Status DownsampleImage2_Sharper(Image3F* opsin) { // Allocate extra space to avoid a reallocation when padding.
JxlMemoryManager* memory_manager = opsin->memory_manager();
JXL_ASSIGN_OR_RETURN(
Image3F downsampled,
Image3F::Create(memory_manager, DivCeil(opsin->xsize(), 2) + kBlockDim,
DivCeil(opsin->ysize(), 2) + kBlockDim));
JXL_RETURN_IF_ERROR(downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
downsampled.ysize() - kBlockDim));
for (size_t c = 0; c < 3; c++) {
JXL_RETURN_IF_ERROR(
DownsampleImage2_Sharper(opsin->Plane(c), &downsampled.Plane(c)));
}
*opsin = std::move(downsampled); returntrue;
}
namespace {
// The default upsampling kernels used by Upsampler in the decoder. const constexpr int64_t kSize = 5;
// Does exactly the same as the Upsampler in dec_upsampler for 2x2 pixels, with // default CustomTransformData. // TODO(lode): use Upsampler instead. However, it requires pre-initialization // and padding on the left side of the image which requires refactoring the // other code using this. void UpsampleImage(const ImageF& input, ImageF* output) {
int64_t xsize = input.xsize();
int64_t ysize = input.ysize();
int64_t xsize2 = output->xsize();
int64_t ysize2 = output->ysize(); for (int64_t y = 0; y < ysize2; y++) { for (int64_t x = 0; x < xsize2; x++) { constauto* kernel = kernel00; if ((x & 1) && (y & 1)) {
kernel = kernel11;
} elseif (x & 1) {
kernel = kernel10;
} elseif (y & 1) {
kernel = kernel01;
} float sum = 0;
int64_t x2 = x / 2;
int64_t y2 = y / 2;
// get min and max values of the original image in the support float min = std::numeric_limits<float>::max(); float max = std::numeric_limits<float>::min();
for (int64_t ky = 0; ky < kSize; ky++) { for (int64_t kx = 0; kx < kSize; kx++) {
int64_t xi = x2 - kSize / 2 + kx;
int64_t yi = y2 - kSize / 2 + ky; if (xi < 0) xi = 0; if (xi >= xsize) xi = input.xsize() - 1; if (yi < 0) yi = 0; if (yi >= ysize) yi = input.ysize() - 1;
min = std::min<float>(min, input.Row(yi)[xi]);
max = std::max<float>(max, input.Row(yi)[xi]);
}
}
for (int64_t ky = 0; ky < kSize; ky++) { for (int64_t kx = 0; kx < kSize; kx++) {
int64_t xi = x2 - kSize / 2 + kx;
int64_t yi = y2 - kSize / 2 + ky; if (xi < 0) xi = 0; if (xi >= xsize) xi = input.xsize() - 1; if (yi < 0) yi = 0; if (yi >= ysize) yi = input.ysize() - 1;
sum += input.Row(yi)[xi] * kernel[ky * kSize + kx];
}
}
output->Row(y)[x] = sum; if (output->Row(y)[x] < min) output->Row(y)[x] = min; if (output->Row(y)[x] > max) output->Row(y)[x] = max;
}
}
}
// Returns the derivative of Upsampler, with respect to input pixel x2, y2, to // output pixel x, y (ignoring the clamping). float UpsamplerDeriv(int64_t x2, int64_t y2, int64_t x, int64_t y) { constauto* kernel = kernel00; if ((x & 1) && (y & 1)) {
kernel = kernel11;
} elseif (x & 1) {
kernel = kernel10;
} elseif (y & 1) {
kernel = kernel01;
}
int64_t ix = x / 2;
int64_t iy = y / 2;
int64_t kx = x2 - ix + kSize / 2;
int64_t ky = y2 - iy + kSize / 2;
// This should not happen. if (kx < 0 || kx >= kSize || ky < 0 || ky >= kSize) return 0;
return kernel[ky * kSize + kx];
}
// Apply the derivative of the Upsampler to the input, reversing the effect of // its coefficients. The output image is 2x2 times smaller than the input. void AntiUpsample(const ImageF& input, ImageF* d) {
int64_t xsize = input.xsize();
int64_t ysize = input.ysize();
int64_t xsize2 = d->xsize();
int64_t ysize2 = d->ysize();
int64_t k0 = kSize - 1;
int64_t k1 = kSize; for (int64_t y2 = 0; y2 < ysize2; ++y2) { auto* row = d->Row(y2); for (int64_t x2 = 0; x2 < xsize2; ++x2) {
int64_t x0 = x2 * 2 - k0; if (x0 < 0) x0 = 0;
int64_t x1 = x2 * 2 + k1 + 1; if (x1 > xsize) x1 = xsize;
int64_t y0 = y2 * 2 - k0; if (y0 < 0) y0 = 0;
int64_t y1 = y2 * 2 + k1 + 1; if (y1 > ysize) y1 = ysize;
float sum = 0; for (int64_t y = y0; y < y1; ++y) { constauto* row_in = input.Row(y); for (int64_t x = x0; x < x1; ++x) { double deriv = UpsamplerDeriv(x2, y2, x, y);
sum += deriv * row_in[x];
}
}
row[x2] = sum;
}
}
}
for (size_t y = 0; y < down.ysize(); y++) { constfloat* row_mask = mask.Row(y); float* row_out = down.Row(y); for (size_t x = 0; x < down.xsize(); x++) { float v = down.Row(y)[x]; float min = initial.Row(y)[x]; float max = initial.Row(y)[x]; for (int64_t yi = -1; yi < 2; yi++) { for (int64_t xi = -1; xi < 2; xi++) {
int64_t x2 = static_cast<int64_t>(x) + xi;
int64_t y2 = static_cast<int64_t>(y) + yi; if (x2 < 0 || y2 < 0 || x2 >= xsize2 || y2 >= ysize2) continue;
min = std::min<float>(min, initial.Row(y2)[x2]);
max = std::max<float>(max, initial.Row(y2)[x2]);
}
}
row_out[x] = v;
// Clamp the pixel within the value of a small area to prevent ringning. // The mask determines how much to clamp, clamp more to reduce more // ringing in smooth areas, clamp less in noisy areas to get more // sharpness. Higher mask_multiplier gives less clamping, so less // ringing reduction. const constexpr float mask_multiplier = 2; float a = row_mask[x] * mask_multiplier; float clip_min = min - a; float clip_max = max + a; if (row_out[x] < clip_min) row_out[x] = clip_min; if (row_out[x] > clip_max) row_out[x] = clip_max;
}
}
}
// TODO(lode): move this to a separate file enc_downsample.cc
Status DownsampleImage2_Iterative(const ImageF& orig, ImageF* output) {
int64_t xsize = orig.xsize();
int64_t ysize = orig.ysize();
int64_t xsize2 = DivCeil(orig.xsize(), 2);
int64_t ysize2 = DivCeil(orig.ysize(), 2);
JxlMemoryManager* memory_manager = orig.memory_manager();
// Initial result image using the sharper downsampling. // Allocate extra space to avoid a reallocation when padding.
JXL_ASSIGN_OR_RETURN(
ImageF initial,
ImageF::Create(memory_manager, DivCeil(orig.xsize(), 2) + kBlockDim,
DivCeil(orig.ysize(), 2) + kBlockDim));
JXL_RETURN_IF_ERROR(initial.ShrinkTo(initial.xsize() - kBlockDim,
initial.ysize() - kBlockDim));
JXL_RETURN_IF_ERROR(DownsampleImage2_Sharper(orig, &initial));
// In the weights map, relatively higher values will allow less ringing but // also less sharpness. With all constant values, it optimizes equally // everywhere. Even in this case, the weights2 computed from // this is still used and differs at the borders of the image. // TODO(lode): Make use of the weights field for anti-ringing and clamping, // the values are all set to 1 for now, but it is intended to be used for // reducing ringing based on the mask, and taking clamping into account.
JXL_ASSIGN_OR_RETURN(ImageF weights,
ImageF::Create(memory_manager, xsize, ysize)); for (size_t y = 0; y < weights.ysize(); y++) { auto* row = weights.Row(y); for (size_t x = 0; x < weights.xsize(); x++) {
row[x] = 1;
}
}
JXL_ASSIGN_OR_RETURN(ImageF weights2,
ImageF::Create(memory_manager, xsize2, ysize2));
AntiUpsample(weights, &weights2);
const size_t num_it = 3; for (size_t it = 0; it < num_it; ++it) {
UpsampleImage(down, &up);
JXL_ASSIGN_OR_RETURN(corr, LinComb<float>(1, orig, -1, up));
JXL_RETURN_IF_ERROR(ElwiseMul(corr, weights, &corr));
AntiUpsample(corr, &corr2);
JXL_RETURN_IF_ERROR(ElwiseDiv(corr2, weights2, &corr2));
// can't just use CopyImage, because the output image was prepared with // padding. for (size_t y = 0; y < down.ysize(); y++) { for (size_t x = 0; x < down.xsize(); x++) { float v = down.Row(y)[x];
output->Row(y)[x] = v;
}
} returntrue;
}
} // namespace
Status DownsampleImage2_Iterative(Image3F* opsin) {
JxlMemoryManager* memory_manager = opsin->memory_manager(); // Allocate extra space to avoid a reallocation when padding.
JXL_ASSIGN_OR_RETURN(
Image3F downsampled,
Image3F::Create(memory_manager, DivCeil(opsin->xsize(), 2) + kBlockDim,
DivCeil(opsin->ysize(), 2) + kBlockDim));
JXL_RETURN_IF_ERROR(downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
downsampled.ysize() - kBlockDim));
JXL_ASSIGN_OR_RETURN(
Image3F rgb,
Image3F::Create(memory_manager, opsin->xsize(), opsin->ysize()));
OpsinParams opsin_params; // TODO(user): use the ones that are actually used
opsin_params.Init(kDefaultIntensityTarget);
JXL_RETURN_IF_ERROR(
OpsinToLinear(*opsin, Rect(rgb), nullptr, &rgb, opsin_params));
// Compute an initial estimate of the quantization field. // Call InitialQuantField only in Hare mode or slower. Otherwise, rely // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon // mode. if (cparams.speed_tier > SpeedTier::kHare ||
cparams.disable_perceptual_optimizations) {
JXL_ASSIGN_OR_RETURN(initial_quant_field,
ImageF::Create(memory_manager, frame_dim.xsize_blocks,
frame_dim.ysize_blocks));
JXL_ASSIGN_OR_RETURN(initial_quant_masking,
ImageF::Create(memory_manager, frame_dim.xsize_blocks,
frame_dim.ysize_blocks)); float q = 0.79 / cparams.butteraugli_distance;
FillImage(q, &initial_quant_field); float masking = 1.0f / (q + 0.001f);
FillImage(masking, &initial_quant_masking); if (cparams.disable_perceptual_optimizations) {
JXL_ASSIGN_OR_RETURN(
initial_quant_masking1x1,
ImageF::Create(memory_manager, frame_dim.xsize, frame_dim.ysize));
FillImage(masking, &initial_quant_masking1x1);
}
quantizer.ComputeGlobalScaleAndQuant(quant_dc, q, 0);
} else { // Call this here, as it relies on pre-gaborish values. float butteraugli_distance_for_iqf = cparams.butteraugli_distance; if (!frame_header.loop_filter.gab) {
butteraugli_distance_for_iqf *= 0.62f;
}
JXL_ASSIGN_OR_RETURN(
initial_quant_field,
InitialQuantField(butteraugli_distance_for_iqf, *opsin, rect, pool,
1.0f, &initial_quant_masking,
&initial_quant_masking1x1)); float q = 0.39 / cparams.butteraugli_distance;
quantizer.ComputeGlobalScaleAndQuant(quant_dc, q, 0);
}
// TODO(veluca): do something about animations.
// Apply inverse-gaborish. if (frame_header.loop_filter.gab) { // Changing the weight here to 0.99f would help to reduce ringing in // generation loss. float weight[3] = {
1.0f,
1.0f,
1.0f,
};
JXL_RETURN_IF_ERROR(GaborishInverse(opsin, rect, weight, pool));
}
if (initialize_global_state) {
JXL_RETURN_IF_ERROR(FindBestDequantMatrices(
memory_manager, cparams, modular_frame_encoder, &matrices));
}
auto process_tile = [&](const uint32_t tid, const size_t thread) -> Status {
size_t n_enc_tiles = DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
size_t tx = tid % n_enc_tiles;
size_t ty = tid / n_enc_tiles;
size_t by0 = ty * kEncTileDimInBlocks;
size_t by1 =
std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
size_t bx0 = tx * kEncTileDimInBlocks;
size_t bx1 =
std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
Rect r(bx0, by0, bx1 - bx0, by1 - by0);
// For speeds up to Wombat, we only compute the color correlation map // once we know the transform type and the quantization map. if (cparams.speed_tier <= SpeedTier::kSquirrel) {
JXL_RETURN_IF_ERROR(cfl_heuristics.ComputeTile(
r, *opsin, rect, matrices, /*ac_strategy=*/nullptr, /*raw_quant_field=*/nullptr, /*quantizer=*/nullptr, /*fast=*/false, thread, &cmap));
}
// Always set the initial quant field, so we can compute the CfL map with // more accuracy. The initial quant field might change in slower modes, but // adjusting the quant field with butteraugli when all the other encoding // parameters are fixed is likely a more reliable choice anyway.
JXL_RETURN_IF_ERROR(AdjustQuantField(
ac_strategy, r, cparams.butteraugli_distance, &initial_quant_field));
quantizer.SetQuantFieldRect(initial_quant_field, r, &raw_quant_field);
// Compute a non-default CfL map if we are at Hare speed, or slower. if (cparams.speed_tier <= SpeedTier::kHare) {
JXL_RETURN_IF_ERROR(cfl_heuristics.ComputeTile(
r, *opsin, rect, matrices, &ac_strategy, &raw_quant_field, &quantizer, /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread, &cmap));
} returntrue;
};
size_t num_tiles = DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks); constauto prepare = [&](const size_t num_threads) -> Status {
JXL_RETURN_IF_ERROR(acs_heuristics.PrepareForThreads(num_threads));
JXL_RETURN_IF_ERROR(cfl_heuristics.PrepareForThreads(num_threads)); returntrue;
};
JXL_RETURN_IF_ERROR(
RunOnPool(pool, 0, num_tiles, prepare, process_tile, "Enc Heuristics"));
// Choose a context model that depends on the amount of quantization for AC. if (cparams.speed_tier < SpeedTier::kFalcon && initialize_global_state) {
FindBestBlockEntropyModel(cparams, raw_quant_field, ac_strategy,
&block_ctx_map);
} returntrue;
}
} // namespace jxl
Messung V0.5
¤ Dauer der Verarbeitung: 0.9 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.