// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
// These templates are not found via ADL. using hwy::HWY_NAMESPACE::Add; using hwy::HWY_NAMESPACE::Broadcast; using hwy::HWY_NAMESPACE::Mul; using hwy::HWY_NAMESPACE::MulAdd; using hwy::HWY_NAMESPACE::Sub;
// Inverts the pixel-wise RGB->XYB conversion in OpsinDynamicsImage() (including // the gamma mixing and simple gamma). Avoids clamping to [0, 1] - out of (sRGB) // gamut values may be in-gamut after transforming to a wider space. // "inverse_matrix" points to 9 broadcasted vectors, which are the 3x3 entries // of the (row-major) opsin absorbance matrix inverse. Pre-multiplying its // entries by c is equivalent to multiplying linear_* by c afterwards. template <class D, class V>
HWY_INLINE HWY_MAYBE_UNUSED void XybToRgb(D d, const V opsin_x, const V opsin_y, const V opsin_b, const OpsinParams& opsin_params,
V* const HWY_RESTRICT linear_r,
V* const HWY_RESTRICT linear_g,
V* const HWY_RESTRICT linear_b) { #if HWY_TARGET == HWY_SCALAR constauto neg_bias_r = Set(d, opsin_params.opsin_biases[0]); constauto neg_bias_g = Set(d, opsin_params.opsin_biases[1]); constauto neg_bias_b = Set(d, opsin_params.opsin_biases[2]); #else constauto neg_bias_rgb = LoadDup128(d, opsin_params.opsin_biases); constauto neg_bias_r = Broadcast<0>(neg_bias_rgb); constauto neg_bias_g = Broadcast<1>(neg_bias_rgb); constauto neg_bias_b = Broadcast<2>(neg_bias_rgb); #endif
// Color space: XYB -> RGB auto gamma_r = Add(opsin_y, opsin_x); auto gamma_g = Sub(opsin_y, opsin_x); auto gamma_b = opsin_b;
inline HWY_MAYBE_UNUSED Status FastXYBTosRGB8(constfloat* input[4],
uint8_t* output, bool is_rgba,
size_t xsize) { // This function is very NEON-specific. As such, it uses intrinsics directly. #if HWY_TARGET == HWY_NEON // WARNING: doing fixed point arithmetic correctly is very complicated. // Changes to this function should be thoroughly tested.
// Note that the input is assumed to have 13 bits of mantissa, and the output // will have 14 bits. auto srgb_tf = [&](int16x8_t v16) {
int16x8_t clz = vclzq_s16(v16); // Convert to [0.25, 0.5) range.
int16x8_t v025_05_16 = vqshlq_s16(v16, vqsubq_s16(clz, vdupq_n_s16(2)));
// third degree polynomial approximation between 0.25 and 0.5 // of 1.055/2^(7/2.4) * x^(1/2.4) / 32. // poly ~ ((0.95x-1.75)*x+1.72)*x+0.29 // We actually compute ~ ((0.47x-0.87)*x+0.86)*(2x)+0.29 as 1.75 and 1.72 // overflow our fixed point representation.
int16x8_t exp16 = vsubq_s16(vdupq_n_s16(11), clz); // Compute 2**(1/2.4*exp16)/32. Values of exp16 that would overflow are // capped to 1. // Generated with the following Python script: // a = [] // b = [] // // for i in range(0, 16): // v = 2**(5/12.*i) // v /= 16 // v *= 256 * 128 // v = int(v) // a.append(v // 256) // b.append(v % 256) // // print(", ".join("0x%02x" % x for x in a)) // // print(", ".join("0x%02x" % x for x in b))
int16x8_t gamma_b16 = vqsubq_s16(opsin_b16, neg_bias_cbrt16);
int16x8_t gamma_bsq16 = vqrdmulhq_s16(gamma_b16, gamma_b16);
int16x8_t gamma_bcb16 = vqrdmulhq_s16(gamma_bsq16, gamma_b16);
int16x8_t mixed_b16 = vqaddq_s16(gamma_bcb16, neg_bias16); // mixed_rpg and mixed_b are in 0-1 range. // mixed_rmg has a smaller range (-0.035 to 0.035 for valid sRGB). Note // that at this point it is already multiplied by 8.
// We multiply all the mixed values by 1/4 (i.e. shift them to 13-bit // fixed point) to ensure intermediate quantities are in range. Note that // r-g is not shifted, and was x8 before here; this corresponds to a x32 // overall multiplicative factor and ensures that all the matrix constants // are in 0-1 range. // Similarly, mixed_rpg16 is already multiplied by 1/4 because of the two // vhadd + using neg_bias_half.
mixed_b16 = vshrq_n_s16(mixed_b16, 2);
// Unmix (multiply by 3x3 inverse_matrix) // For increased precision, we use a matrix for converting from // ((mixed_r - mixed_g)/2, (mixed_r + mixed_g)/2, mixed_b) to rgb. This // avoids cancellation effects when computing (y+x)^3-(y-x)^3. // We compute mixed_rpg - mixed_b because the (1+c)*mixed_rpg - c * // mixed_b pattern is repeated frequently in the code below. This allows // us to save a multiply per channel, and removes the presence of // some constants above 1. Moreover, mixed_rmg - mixed_b is in (-1, 1) // range, so the subtraction is safe. // All the magic-looking constants here are derived by computing the // inverse opsin matrix for the transformation modified as described // above.
// Precomputation common to multiple color values.
int16x8_t mixed_rpgmb16 = vqsubq_s16(mixed_rpg16, mixed_b16);
int16x8_t mixed_rpgmb_times_016 = vqrdmulhq_n_s16(mixed_rpgmb16, 5394);
int16x8_t mixed_rg16 = vqaddq_s16(mixed_rpgmb_times_016, mixed_rpg16);
// R
int16x8_t linear_r16 =
vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, 21400));
// G
int16x8_t linear_g16 =
vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, -7857));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.