// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
constauto factor_v = Set(df, factor); for (size_t x = 0; x < xsize; x += Lanes(di)) { constauto in = Load(di, row_in + x); constauto out = Mul(ConvertTo(df, in), factor_v);
Store(out, df, row_out + x);
}
} // NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace jxl
HWY_AFTER_NAMESPACE();
#if HWY_ONCE namespace jxl {
HWY_EXPORT(MultiplySum); // Local function
HWY_EXPORT(RgbFromSingle); // Local function
HWY_EXPORT(SingleFromSingle); // Local function
// Slow conversion using double precision multiplication, only // needed when the bit depth is too high for single precision void SingleFromSingleAccurate(const size_t xsize, const pixel_type* const JXL_RESTRICT row_in, constdouble factor, float* row_out) { for (size_t x = 0; x < xsize; x++) {
row_out[x] = row_in[x] * factor;
}
}
// convert custom [bits]-bit float (with [exp_bits] exponent bits) stored as int // back to binary32 float
Status int_to_float(const pixel_type* const JXL_RESTRICT row_in, float* const JXL_RESTRICT row_out, const size_t xsize, constint bits, constint exp_bits) {
static_assert(sizeof(pixel_type) == sizeof(float)); if (bits == 32) {
JXL_ENSURE(exp_bits == 8);
memcpy(row_out, row_in, xsize * sizeof(float)); returntrue;
} int exp_bias = (1 << (exp_bits - 1)) - 1; int sign_shift = bits - 1; int mant_bits = bits - exp_bits - 1; int mant_shift = 23 - mant_bits; for (size_t x = 0; x < xsize; ++x) {
uint32_t f;
memcpy(&f, &row_in[x], 4); int signbit = (f >> sign_shift);
f &= (1 << sign_shift) - 1; if (f == 0) {
row_out[x] = (signbit ? -0.f : 0.f); continue;
} int exp = (f >> mant_bits); int mantissa = (f & ((1 << mant_bits) - 1));
mantissa <<= mant_shift; // Try to normalize only if there is space for maneuver. if (exp == 0 && exp_bits < 8) { // subnormal number while ((mantissa & 0x800000) == 0) {
mantissa <<= 1;
exp--;
}
exp++; // remove leading 1 because it is implicit now
mantissa &= 0x7fffff;
}
exp -= exp_bias; // broke up the arbitrary float into its parts, now reassemble into // binary32
exp += 127;
JXL_ENSURE(exp >= 0);
f = (signbit ? 0x80000000 : 0);
f |= (exp << 23);
f |= mantissa;
memcpy(&row_out[x], &f, 4);
} returntrue;
}
JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (w/o transforms) %s",
gi.DebugString().c_str());
ModularOptions options;
options.max_chan_size = frame_dim.group_dim;
options.group_dim = frame_dim.group_dim;
Status dec_status = ModularGenericDecompress(
reader, gi, &global_header, ModularStreamId::Global().ID(frame_dim),
&options, /*undo_transforms=*/false, &tree, &code, &context_map,
allow_truncated_group); if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status); if (dec_status.IsFatalError()) { return JXL_FAILURE("Failed to decode global modular info");
}
// TODO(eustas): are we sure this can be done after partial decode?
have_something = false; for (size_t c = 0; c < gi.channel.size(); c++) {
Channel& gic = gi.channel[c]; if (c >= gi.nb_meta_channels && gic.w <= frame_dim.group_dim &&
gic.h <= frame_dim.group_dim)
have_something = true;
} // move global transforms to groups if possible if (!have_something && all_same_shift) { if (gi.transform.size() == 1 && gi.transform[0].id == TransformId::kRCT) {
global_transform = gi.transform;
gi.transform.clear(); // TODO(jon): also move no-delta-palette out (trickier though)
}
}
full_image = std::move(gi);
JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (with transforms) %s",
full_image.DebugString().c_str()); return dec_status;
}
void ModularFrameDecoder::MaybeDropFullImage() { if (full_image.transform.empty() && !have_something && all_same_shift) {
use_full_image = false;
JXL_DEBUG_V(6, "Dropping full image"); for (auto& ch : full_image.channel) { // keep metadata on channels around, but dealloc their planes
ch.plane = Plane<pixel_type>();
}
}
}
Status ModularFrameDecoder::DecodeGroup( const FrameHeader& frame_header, const Rect& rect, BitReader* reader, int minShift, int maxShift, const ModularStreamId& stream, bool zerofill,
PassesDecoderState* dec_state, RenderPipelineInput* render_pipeline_input, bool allow_truncated, bool* should_run_pipeline) {
JXL_DEBUG_V(6, "Decoding %s with rect %s and shift bracket %d..%d %s",
stream.DebugString().c_str(), Description(rect).c_str(), minShift,
maxShift, zerofill ? "using zerofill" : "");
JXL_ENSURE(stream.kind == ModularStreamId::Kind::ModularDC ||
stream.kind == ModularStreamId::Kind::ModularAC); const size_t xsize = rect.xsize(); const size_t ysize = rect.ysize();
JXL_ASSIGN_OR_RETURN(Image gi, Image::Create(memory_manager_, xsize, ysize,
full_image.bitdepth, 0)); // start at the first bigger-than-groupsize non-metachannel
size_t c = full_image.nb_meta_channels; for (; c < full_image.channel.size(); c++) {
Channel& fc = full_image.channel[c]; if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break;
}
size_t beginc = c; for (; c < full_image.channel.size(); c++) {
Channel& fc = full_image.channel[c]; int shift = std::min(fc.hshift, fc.vshift); if (shift > maxShift) continue; if (shift < minShift) continue;
Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); if (r.xsize() == 0 || r.ysize() == 0) continue; if (zerofill && use_full_image) { for (size_t y = 0; y < r.ysize(); ++y) {
pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y);
memset(row_out, 0, r.xsize() * sizeof(*row_out));
}
} else {
JXL_ASSIGN_OR_RETURN(
Channel gc, Channel::Create(memory_manager_, r.xsize(), r.ysize())); if (zerofill) ZeroFillImage(&gc.plane);
gc.hshift = fc.hshift;
gc.vshift = fc.vshift;
gi.channel.emplace_back(std::move(gc));
}
} if (zerofill && use_full_image) returntrue; // Return early if there's nothing to decode. Otherwise there might be // problems later (in ModularImageToDecodedRect). if (gi.channel.empty()) { if (dec_state && should_run_pipeline) { constauto* metadata = frame_header.nonserialized_metadata; if (do_color || metadata->m.num_extra_channels > 0) { // Signal to FrameDecoder that we do not have some of the required input // for the render pipeline.
*should_run_pipeline = false;
}
}
JXL_DEBUG_V(6, "Nothing to decode, returning early."); returntrue;
}
ModularOptions options; if (!zerofill) { auto status = ModularGenericDecompress(
reader, gi, /*header=*/nullptr, stream.ID(frame_dim), &options, /*undo_transforms=*/true, &tree, &code, &context_map, allow_truncated); if (!allow_truncated) JXL_RETURN_IF_ERROR(status); if (status.IsFatalError()) return status;
} // Undo global transforms that have been pushed to the group level if (!use_full_image) {
JXL_ENSURE(render_pipeline_input); for (constauto& t : global_transform) {
JXL_RETURN_IF_ERROR(t.Inverse(gi, global_header.wp_header));
}
JXL_RETURN_IF_ERROR(ModularImageToDecodedRect(
frame_header, gi, dec_state, nullptr, *render_pipeline_input,
Rect(0, 0, gi.w, gi.h))); returntrue;
} int gic = 0; for (c = beginc; c < full_image.channel.size(); c++) {
Channel& fc = full_image.channel[c]; int shift = std::min(fc.hshift, fc.vshift); if (shift > maxShift) continue; if (shift < minShift) continue;
Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); if (r.xsize() == 0 || r.ysize() == 0) continue;
JXL_ENSURE(use_full_image);
JXL_RETURN_IF_ERROR(
CopyImageTo(/*rect_from=*/Rect(0, 0, r.xsize(), r.ysize()), /*from=*/gi.channel[gic].plane, /*rect_to=*/r, /*to=*/&fc.plane));
gic++;
} returntrue;
}
Status ModularFrameDecoder::DecodeVarDCTDC(const FrameHeader& frame_header,
size_t group_id, BitReader* reader,
PassesDecoderState* dec_state) {
JxlMemoryManager* memory_manager = dec_state->memory_manager(); const Rect r = dec_state->shared->frame_dim.DCGroupRect(group_id);
JXL_DEBUG_V(6, "Decoding VarDCT DC with rect %s", Description(r).c_str()); // TODO(eustas): investigate if we could reduce the impact of // EvalRationalPolynomial; generally speaking, the limit is // 2**(128/(3*magic)), where 128 comes from IEEE 754 exponent, // 3 comes from XybToRgb that cubes the values, and "magic" is // the sum of all other contributions. 2**18 is known to lead // to NaN on input found by fuzzing (see commit message).
JXL_ASSIGN_OR_RETURN(Image image,
Image::Create(memory_manager, r.xsize(), r.ysize(),
full_image.bitdepth, 3));
size_t stream_id = ModularStreamId::VarDCTDC(group_id).ID(frame_dim);
reader->Refill();
size_t extra_precision = reader->ReadFixedBits<2>(); float mul = 1.0f / (1 << extra_precision);
ModularOptions options; for (size_t c = 0; c < 3; c++) {
Channel& ch = image.channel[c < 2 ? c ^ 1 : c];
ch.w >>= frame_header.chroma_subsampling.HShift(c);
ch.h >>= frame_header.chroma_subsampling.VShift(c);
JXL_RETURN_IF_ERROR(ch.shrink());
} if (!ModularGenericDecompress(
reader, image, /*header=*/nullptr, stream_id, &options, /*undo_transforms=*/true, &tree, &code, &context_map)) { return JXL_FAILURE("Failed to decode VarDCT DC group (DC group id %d)", static_cast<int>(group_id));
}
DequantDC(r, &dec_state->shared_storage.dc_storage,
&dec_state->shared_storage.quant_dc, image,
dec_state->shared->quantizer.MulDC(), mul,
dec_state->shared->cmap.base().DCFactors(),
frame_header.chroma_subsampling, dec_state->shared->block_ctx_map); returntrue;
}
size_t c = 0; if (do_color) { constbool rgb_from_gray =
metadata->m.color_encoding.IsGray() &&
frame_header.color_transform == ColorTransform::kNone; constbool fp = metadata->m.bit_depth.floating_point_sample &&
frame_header.color_transform != ColorTransform::kXYB; for (; c < 3; c++) { double factor = full_image.bitdepth < 32
? 1.0 / ((1u << full_image.bitdepth) - 1)
: 0;
size_t c_in = c; if (frame_header.color_transform == ColorTransform::kXYB) {
factor = dec_state->shared->matrices.DCQuants()[c]; // XYB is encoded as YX(B-Y) if (c < 2) c_in = 1 - c;
} elseif (rgb_from_gray) {
c_in = 0;
}
JXL_ENSURE(c_in < gi.channel.size());
Channel& ch_in = gi.channel[c_in]; // TODO(eustas): could we detect it on earlier stage? if (ch_in.w == 0 || ch_in.h == 0) { return JXL_FAILURE("Empty image");
}
JXL_ENSURE(ch_in.hshift <= 3 && ch_in.vshift <= 3);
Rect r = render_pipeline_input.GetBuffer(c).second;
Rect mr(modular_rect.x0() >> ch_in.hshift,
modular_rect.y0() >> ch_in.vshift,
DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
mr = mr.Crop(ch_in.plane);
size_t xsize_shifted = r.xsize();
size_t ysize_shifted = r.ysize(); if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) { return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS "x%" PRIuS " modular channel into " "a %" PRIuS "x%" PRIuS " rect",
mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
} if (frame_header.color_transform == ColorTransform::kXYB && c == 2) {
JXL_ENSURE(!fp); constauto process_row = [&](const uint32_t task,
size_t /* thread */) -> Status { const size_t y = task; const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); const pixel_type* const JXL_RESTRICT row_in_Y =
mr.Row(&gi.channel[0].plane, y); float* const JXL_RESTRICT row_out = get_row(c, y);
HWY_DYNAMIC_DISPATCH(MultiplySum)
(xsize_shifted, row_in, row_in_Y, factor, row_out); returntrue;
};
JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
ThreadPool::NoInit, process_row, "ModularIntToFloat"));
} elseif (fp) { int bits = metadata->m.bit_depth.bits_per_sample; int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample; constauto process_row = [&](const uint32_t task,
size_t /* thread */) -> Status { const size_t y = task; const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); if (rgb_from_gray) { for (size_t cc = 0; cc < 3; cc++) { float* const JXL_RESTRICT row_out = get_row(cc, y);
JXL_RETURN_IF_ERROR(
int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits));
}
} else { float* const JXL_RESTRICT row_out = get_row(c, y);
JXL_RETURN_IF_ERROR(
int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits));
} returntrue;
};
JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
ThreadPool::NoInit, process_row, "ModularIntToFloat_losslessfloat"));
} else { constauto process_row = [&](const uint32_t task,
size_t /* thread */) -> Status { const size_t y = task; const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); if (rgb_from_gray) { if (full_image.bitdepth < 23) {
HWY_DYNAMIC_DISPATCH(RgbFromSingle)
(xsize_shifted, row_in, factor, get_row(0, y), get_row(1, y),
get_row(2, y));
} else {
SingleFromSingleAccurate(xsize_shifted, row_in, factor,
get_row(0, y));
SingleFromSingleAccurate(xsize_shifted, row_in, factor,
get_row(1, y));
SingleFromSingleAccurate(xsize_shifted, row_in, factor,
get_row(2, y));
}
} else { float* const JXL_RESTRICT row_out = get_row(c, y); if (full_image.bitdepth < 23) {
HWY_DYNAMIC_DISPATCH(SingleFromSingle)
(xsize_shifted, row_in, factor, row_out);
} else {
SingleFromSingleAccurate(xsize_shifted, row_in, factor, row_out);
}
} returntrue;
};
JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
ThreadPool::NoInit, process_row, "ModularIntToFloat"));
} if (rgb_from_gray) { break;
}
} if (rgb_from_gray) {
c = 1;
}
}
size_t num_extra_channels = metadata->m.num_extra_channels; for (size_t ec = 0; ec < num_extra_channels; ec++, c++) { const ExtraChannelInfo& eci = metadata->m.extra_channel_info[ec]; int bits = eci.bit_depth.bits_per_sample; int exp_bits = eci.bit_depth.exponent_bits_per_sample; bool fp = eci.bit_depth.floating_point_sample;
JXL_ENSURE(fp || bits < 32); constdouble factor = fp ? 0 : (1.0 / ((1u << bits) - 1));
JXL_ENSURE(c < gi.channel.size());
Channel& ch_in = gi.channel[c]; constauto& buffer = render_pipeline_input.GetBuffer(3 + ec);
Rect r = buffer.second;
Rect mr(modular_rect.x0() >> ch_in.hshift,
modular_rect.y0() >> ch_in.vshift,
DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
mr = mr.Crop(ch_in.plane); if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) { return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS "x%" PRIuS " modular channel into " "a %" PRIuS "x%" PRIuS " rect",
mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
} for (size_t y = 0; y < r.ysize(); ++y) { float* const JXL_RESTRICT row_out = r.Row(buffer.first, y); const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); if (fp) {
JXL_RETURN_IF_ERROR(
int_to_float(row_in, row_out, r.xsize(), bits, exp_bits));
} else { if (full_image.bitdepth < 23) {
HWY_DYNAMIC_DISPATCH(SingleFromSingle)
(r.xsize(), row_in, factor, row_out);
} else {
SingleFromSingleAccurate(r.xsize(), row_in, factor, row_out);
}
}
}
} returntrue;
}
Status ModularFrameDecoder::FinalizeDecoding(const FrameHeader& frame_header,
PassesDecoderState* dec_state,
jxl::ThreadPool* pool, bool inplace) { if (!use_full_image) returntrue;
JxlMemoryManager* memory_manager = dec_state->memory_manager();
Image gi{memory_manager}; if (inplace) {
gi = std::move(full_image);
} else {
JXL_ASSIGN_OR_RETURN(gi, Image::Clone(full_image));
}
size_t xsize = gi.w;
size_t ysize = gi.h;
JXL_DEBUG_V(3, "Finalizing decoding for modular image: %s",
gi.DebugString().c_str());
// Don't use threads if total image size is smaller than a group if (xsize * ysize < frame_dim.group_dim * frame_dim.group_dim) pool = nullptr;
// Undo the global transforms
gi.undo_transforms(global_header.wp_header, pool);
JXL_ENSURE(global_transform.empty()); if (gi.error) return JXL_FAILURE("Undoing transforms failed");
for (size_t i = 0; i < dec_state->shared->frame_dim.num_groups; i++) {
dec_state->render_pipeline->ClearDone(i);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.