// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
// These templates are not found via ADL. using hwy::HWY_NAMESPACE::Clamp; using hwy::HWY_NAMESPACE::Mul; using hwy::HWY_NAMESPACE::NearestInt;
// TODO(jon): check if this can be replaced by a FloatToU16 function void FloatToU32(constfloat* in, uint32_t* out, size_t num, float mul,
size_t bits_per_sample) { const HWY_FULL(float) d; const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du;
// Unpoison accessing partially-uninitialized vectors with memory sanitizer. // This is because we run NearestInt() on the vector, which triggers MSAN even // it is safe to do so since the values are not mixed between lanes. const size_t num_round_up = RoundUpTo(num, Lanes(d));
msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
constauto one = Set(d, 1.0f); constauto scale = Set(d, mul); for (size_t x = 0; x < num; x += Lanes(d)) { auto v = Load(d, in + x); // Clamp turns NaN to 'min'.
v = Clamp(v, Zero(d), one); auto i = NearestInt(Mul(v, scale));
Store(BitCast(du, i), du, out + x);
}
// Poison back the output.
msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
}
void FloatToF16(constfloat* in, hwy::float16_t* out, size_t num) { const HWY_FULL(float) d; const hwy::HWY_NAMESPACE::Rebind<hwy::float16_t, decltype(d)> du;
// Unpoison accessing partially-uninitialized vectors with memory sanitizer. // This is because we run DemoteTo() on the vector which triggers msan. const size_t num_round_up = RoundUpTo(num, Lanes(d));
msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
for (size_t x = 0; x < num; x += Lanes(d)) { auto v = Load(d, in + x); auto v16 = DemoteTo(du, v);
Store(v16, du, out + x);
}
// Poison back the output.
msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
}
std::vector<std::vector<uint8_t>> row_out_callback; constauto FreeCallbackOpaque = [&out_callback](void* p) {
out_callback.destroy(p);
};
std::unique_ptr<void, decltype(FreeCallbackOpaque)> out_run_opaque(
nullptr, FreeCallbackOpaque); auto InitOutCallback = [&](size_t num_threads) -> Status { if (out_callback.IsPresent()) {
out_run_opaque.reset(out_callback.Init(num_threads, stride));
JXL_RETURN_IF_ERROR(out_run_opaque != nullptr);
row_out_callback.resize(num_threads); for (size_t i = 0; i < num_threads; ++i) {
row_out_callback[i].resize(stride);
}
} returntrue;
};
// Channels used to store the transformed original channels if needed.
ImageF temp_channels[kConvertMaxChannels]; if (undo_orientation != Orientation::kIdentity) { for (size_t c = 0; c < num_channels; ++c) { if (channels[c]) {
JXL_RETURN_IF_ERROR(UndoOrientation(undo_orientation, *channels[c],
temp_channels[c], pool));
channels[c] = &(temp_channels[c]);
}
}
}
// First channel may not be nullptr.
size_t xsize = channels[0]->xsize();
size_t ysize = channels[0]->ysize(); if (stride < bytes_per_pixel * xsize) { return JXL_FAILURE("stride is smaller than scanline width in bytes: %" PRIuS " vs %" PRIuS,
stride, bytes_per_pixel * xsize);
} if (!out_callback.IsPresent() &&
out_size < (ysize - 1) * stride + bytes_per_pixel * xsize) { return JXL_FAILURE("out_size is too small to store image");
}
// Handle the case where a channel is nullptr by creating a single row with // ones to use instead.
ImageF ones; for (size_t c = 0; c < num_channels; ++c) { if (!channels[c]) {
JXL_ASSIGN_OR_RETURN(ones, ImageF::Create(memory_manager, xsize, 1));
FillImage(1.0f, &ones); break;
}
}
if (float_out) { if (bits_per_sample == 16) { bool swap_endianness = little_endian != IsLittleEndian();
Plane<hwy::float16_t> f16_cache; constauto init_cache = [&](size_t num_threads) -> Status {
JXL_ASSIGN_OR_RETURN(
f16_cache, Plane<hwy::float16_t>::Create(
memory_manager, xsize, num_channels * num_threads));
JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); returntrue;
}; constauto process_row = [&](const uint32_t task, const size_t thread) -> Status { const int64_t y = task; constfloat* JXL_RESTRICT row_in[kConvertMaxChannels]; for (size_t c = 0; c < num_channels; c++) {
row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
}
hwy::float16_t* JXL_RESTRICT row_f16[kConvertMaxChannels]; for (size_t c = 0; c < num_channels; c++) {
row_f16[c] = f16_cache.Row(c + thread * num_channels);
HWY_DYNAMIC_DISPATCH(FloatToF16)
(row_in[c], row_f16[c], xsize);
}
uint8_t* row_out =
out_callback.IsPresent()
? row_out_callback[thread].data()
: &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; // interleave the one scanline
hwy::float16_t* row_f16_out = reinterpret_cast<hwy::float16_t*>(row_out); for (size_t x = 0; x < xsize; x++) { for (size_t c = 0; c < num_channels; c++) {
row_f16_out[x * num_channels + c] = row_f16[c][x];
}
} if (swap_endianness) {
size_t size = xsize * num_channels * 2; for (size_t i = 0; i < size; i += 2) {
std::swap(row_out[i + 0], row_out[i + 1]);
}
} if (out_callback.IsPresent()) {
out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out);
} returntrue;
};
JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
init_cache, process_row, "ConvertF16"));
} elseif (bits_per_sample == 32) { constauto init_cache = [&](size_t num_threads) -> Status {
JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); returntrue;
}; constauto process_row = [&](const uint32_t task, const size_t thread) -> Status { const int64_t y = task;
uint8_t* row_out =
out_callback.IsPresent()
? row_out_callback[thread].data()
: &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; constfloat* JXL_RESTRICT row_in[kConvertMaxChannels]; for (size_t c = 0; c < num_channels; c++) {
row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
} if (little_endian) {
StoreFloatRow<StoreLEFloat>(row_in, num_channels, xsize, row_out);
} else {
StoreFloatRow<StoreBEFloat>(row_in, num_channels, xsize, row_out);
} if (out_callback.IsPresent()) {
out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out);
} returntrue;
};
JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
init_cache, process_row, "ConvertFloat"));
} else { return JXL_FAILURE("float other than 16-bit and 32-bit not supported");
}
} else { // Multiplier to convert from floating point 0-1 range to the integer // range. float mul = (1ull << bits_per_sample) - 1;
Plane<uint32_t> u32_cache; constauto init_cache = [&](size_t num_threads) -> Status {
JXL_ASSIGN_OR_RETURN(u32_cache,
Plane<uint32_t>::Create(memory_manager, xsize,
num_channels * num_threads));
JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); returntrue;
}; constauto process_row = [&](const uint32_t task, const size_t thread) -> Status { const int64_t y = task;
uint8_t* row_out =
out_callback.IsPresent()
? row_out_callback[thread].data()
: &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; constfloat* JXL_RESTRICT row_in[kConvertMaxChannels]; for (size_t c = 0; c < num_channels; c++) {
row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
}
uint32_t* JXL_RESTRICT row_u32[kConvertMaxChannels]; for (size_t c = 0; c < num_channels; c++) {
row_u32[c] = u32_cache.Row(c + thread * num_channels); // row_u32[] is a per-thread temporary row storage, this isn't // intended to be initialized on a previous run.
msan::PoisonMemory(row_u32[c], xsize * sizeof(row_u32[c][0]));
HWY_DYNAMIC_DISPATCH(FloatToU32)
(row_in[c], row_u32[c], xsize, mul, bits_per_sample);
} if (bits_per_sample <= 8) {
StoreUintRow<Store8>(row_u32, num_channels, xsize, 1, row_out);
} else { if (little_endian) {
StoreUintRow<StoreLE16>(row_u32, num_channels, xsize, 2, row_out);
} else {
StoreUintRow<StoreBE16>(row_u32, num_channels, xsize, 2, row_out);
}
} if (out_callback.IsPresent()) {
out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out);
} returntrue;
};
JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
init_cache, process_row, "ConvertUint"));
} returntrue;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.