Anforderungen  |   Konzepte  |   Entwurf  |   Entwicklung  |   Qualitätssicherung  |   Lebenszyklus  |   Steuerung
 
 
 
 


Quelle  dec_group.cc   Sprache: C

 
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "lib/jxl/dec_group.h"

#include <algorithm>
#include <cstdint>
#include <cstring>
#include <memory>
#include <utility>

#include "lib/jxl/chroma_from_luma.h"
#include "lib/jxl/frame_header.h"

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "lib/jxl/dec_group.cc"
#include <hwy/foreach_target.h>
#include <hwy/highway.h>

#include "lib/jxl/ac_context.h"
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/bits.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/printf_macros.h"
#include "lib/jxl/base/rect.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/coeff_order.h"
#include "lib/jxl/common.h"  // kMaxNumPasses
#include "lib/jxl/dec_cache.h"
#include "lib/jxl/dec_transforms-inl.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/entropy_coder.h"
#include "lib/jxl/quant_weights.h"
#include "lib/jxl/quantizer-inl.h"
#include "lib/jxl/quantizer.h"

#ifndef LIB_JXL_DEC_GROUP_CC
#define LIB_JXL_DEC_GROUP_CC
namespace jxl {

struct AuxOut;

// Interface for reading groups for DecodeGroupImpl.
class GetBlock {
 public:
  virtual void StartRow(size_t by) = 0;
  virtual Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs,
                           size_t size, size_t log2_covered_blocks,
                           ACPtr block[3], ACType ac_type) = 0;
  virtual ~GetBlock() {}
};

// Controls whether DecodeGroupImpl renders to pixels or not.
enum DrawMode {
  // Render to pixels.
  kDraw = 0,
  // Don't render to pixels.
  kDontDraw = 1,
};

}  // namespace jxl
#endif  // LIB_JXL_DEC_GROUP_CC

HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {

// These templates are not found via ADL.
using hwy::HWY_NAMESPACE::AllFalse;
using hwy::HWY_NAMESPACE::Gt;
using hwy::HWY_NAMESPACE::Le;
using hwy::HWY_NAMESPACE::MaskFromVec;
using hwy::HWY_NAMESPACE::Or;
using hwy::HWY_NAMESPACE::Rebind;
using hwy::HWY_NAMESPACE::ShiftRight;

using D = HWY_FULL(float);
using DU = HWY_FULL(uint32_t);
using DI = HWY_FULL(int32_t);
using DI16 = Rebind<int16_t, DI>;
using DI16_FULL = HWY_CAPPED(int16_t, kDCTBlockSize);
constexpr D d;
constexpr DI di;
constexpr DI16 di16;
constexpr DI16_FULL di16_full;

// TODO(veluca): consider SIMDfying.
void Transpose8x8InPlace(int32_t* JXL_RESTRICT block) {
  for (size_t x = 0; x < 8; x++) {
    for (size_t y = x + 1; y < 8; y++) {
      std::swap(block[y * 8 + x], block[x * 8 + y]);
    }
  }
}

template <ACType ac_type>
void DequantLane(Vec<D> scaled_dequant_x, Vec<D> scaled_dequant_y,
                 Vec<D> scaled_dequant_b,
                 const float* JXL_RESTRICT dequant_matrices, size_t size,
                 size_t k, Vec<D> x_cc_mul, Vec<D> b_cc_mul,
                 const float* JXL_RESTRICT biases, ACPtr qblock[3],
                 float* JXL_RESTRICT block) {
  const auto x_mul = Mul(Load(d, dequant_matrices + k), scaled_dequant_x);
  const auto y_mul =
      Mul(Load(d, dequant_matrices + size + k), scaled_dequant_y);
  const auto b_mul =
      Mul(Load(d, dequant_matrices + 2 * size + k), scaled_dequant_b);

  Vec<DI> quantized_x_int;
  Vec<DI> quantized_y_int;
  Vec<DI> quantized_b_int;
  if (ac_type == ACType::k16) {
    Rebind<int16_t, DI> di16;
    quantized_x_int = PromoteTo(di, Load(di16, qblock[0].ptr16 + k));
    quantized_y_int = PromoteTo(di, Load(di16, qblock[1].ptr16 + k));
    quantized_b_int = PromoteTo(di, Load(di16, qblock[2].ptr16 + k));
  } else {
    quantized_x_int = Load(di, qblock[0].ptr32 + k);
    quantized_y_int = Load(di, qblock[1].ptr32 + k);
    quantized_b_int = Load(di, qblock[2].ptr32 + k);
  }

  const auto dequant_x_cc =
      Mul(AdjustQuantBias(di, 0, quantized_x_int, biases), x_mul);
  const auto dequant_y =
      Mul(AdjustQuantBias(di, 1, quantized_y_int, biases), y_mul);
  const auto dequant_b_cc =
      Mul(AdjustQuantBias(di, 2, quantized_b_int, biases), b_mul);

  const auto dequant_x = MulAdd(x_cc_mul, dequant_y, dequant_x_cc);
  const auto dequant_b = MulAdd(b_cc_mul, dequant_y, dequant_b_cc);
  Store(dequant_x, d, block + k);
  Store(dequant_y, d, block + size + k);
  Store(dequant_b, d, block + 2 * size + k);
}

template <ACType ac_type>
void DequantBlock(const AcStrategy& acs, float inv_global_scale, int quant,
                  float x_dm_multiplier, float b_dm_multiplier, Vec<D> x_cc_mul,
                  Vec<D> b_cc_mul, AcStrategyType kind, size_t size,
                  const Quantizer& quantizer, size_t covered_blocks,
                  const size_t* sbx,
                  const float* JXL_RESTRICT* JXL_RESTRICT dc_row,
                  size_t dc_stride, const float* JXL_RESTRICT biases,
                  ACPtr qblock[3], float* JXL_RESTRICT block,
                  float* JXL_RESTRICT scratch) {
  const auto scaled_dequant_s = inv_global_scale / quant;

  const auto scaled_dequant_x = Set(d, scaled_dequant_s * x_dm_multiplier);
  const auto scaled_dequant_y = Set(d, scaled_dequant_s);
  const auto scaled_dequant_b = Set(d, scaled_dequant_s * b_dm_multiplier);

  const float* dequant_matrices = quantizer.DequantMatrix(kind, 0);

  for (size_t k = 0; k < covered_blocks * kDCTBlockSize; k += Lanes(d)) {
    DequantLane<ac_type>(scaled_dequant_x, scaled_dequant_y, scaled_dequant_b,
                         dequant_matrices, size, k, x_cc_mul, b_cc_mul, biases,
                         qblock, block);
  }
  for (size_t c = 0; c < 3; c++) {
    LowestFrequenciesFromDC(acs.Strategy(), dc_row[c] + sbx[c], dc_stride,
                            block + c * size, scratch);
  }
}

Status DecodeGroupImpl(const FrameHeader& frame_header,
                       GetBlock* JXL_RESTRICT get_block,
                       GroupDecCache* JXL_RESTRICT group_dec_cache,
                       PassesDecoderState* JXL_RESTRICT dec_state,
                       size_t thread, size_t group_idx,
                       RenderPipelineInput& render_pipeline_input,
                       jpeg::JPEGData* jpeg_data, DrawMode draw) {
  // TODO(veluca): investigate cache usage in this function.
  const Rect block_rect =
      dec_state->shared->frame_dim.BlockGroupRect(group_idx);
  const AcStrategyImage& ac_strategy = dec_state->shared->ac_strategy;

  const size_t xsize_blocks = block_rect.xsize();
  const size_t ysize_blocks = block_rect.ysize();

  const size_t dc_stride = dec_state->shared->dc->PixelsPerRow();

  const float inv_global_scale = dec_state->shared->quantizer.InvGlobalScale();

  const YCbCrChromaSubsampling& cs = frame_header.chroma_subsampling;

  const auto kJpegDctMin = Set(di16_full, -4095);
  const auto kJpegDctMax = Set(di16_full, 4095);

  size_t idct_stride[3];
  for (size_t c = 0; c < 3; c++) {
    idct_stride[c] = render_pipeline_input.GetBuffer(c).first->PixelsPerRow();
  }

  HWY_ALIGN int32_t scaled_qtable[64 * 3];

  ACType ac_type = dec_state->coefficients->Type();
  auto dequant_block = ac_type == ACType::k16 ? DequantBlock<ACType::k16>
                                              : DequantBlock<ACType::k32>;
  // Whether or not coefficients should be stored for future usage, and/or read
  // from past usage.
  bool accumulate = !dec_state->coefficients->IsEmpty();
  // Offset of the current block in the group.
  size_t offset = 0;

  std::array<int, 3> jpeg_c_map;
  bool jpeg_is_gray = false;
  std::array<int, 3> dcoff = {};

  // TODO(veluca): all of this should be done only once per image.
  const ColorCorrelation& color_correlation = dec_state->shared->cmap.base();
  if (jpeg_data) {
    if (!color_correlation.IsJPEGCompatible()) {
      return JXL_FAILURE("The CfL map is not JPEG-compatible");
    }
    jpeg_is_gray = (jpeg_data->components.size() == 1);
    JXL_ENSURE(frame_header.color_transform != ColorTransform::kXYB);
    jpeg_c_map = JpegOrder(frame_header.color_transform, jpeg_is_gray);
    const std::vector<QuantEncoding>& qe =
        dec_state->shared->matrices.encodings();
    if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW ||
        std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) {
      return JXL_FAILURE(
          "Quantization table is not a JPEG quantization table.");
    }
    JXL_ENSURE(qe[0].qraw.qtable->size() == 3 * 8 * 8);
    int* qtable = qe[0].qraw.qtable->data();
    for (size_t c = 0; c < 3; c++) {
      if (frame_header.color_transform == ColorTransform::kNone) {
        dcoff[c] = 1024 / qtable[64 * c];
      }
      for (size_t i = 0; i < 64; i++) {
        // Transpose the matrix, as it will be used on the transposed block.
        int n = qtable[64 + i];
        int d = qtable[64 * c + i];
        if (n <= 0 || d <= 0 || n >= 65536 || d >= 65536) {
          return JXL_FAILURE("Invalid JPEG quantization table");
        }
        scaled_qtable[64 * c + (i % 8) * 8 + (i / 8)] =
            (1 << kCFLFixedPointPrecision) * n / d;
      }
    }
  }

  size_t hshift[3] = {cs.HShift(0), cs.HShift(1), cs.HShift(2)};
  size_t vshift[3] = {cs.VShift(0), cs.VShift(1), cs.VShift(2)};
  Rect r[3];
  for (size_t i = 0; i < 3; i++) {
    r[i] =
        Rect(block_rect.x0() >> hshift[i], block_rect.y0() >> vshift[i],
             block_rect.xsize() >> hshift[i], block_rect.ysize() >> vshift[i]);
    if (!r[i].IsInside({0, 0, dec_state->shared->dc->Plane(i).xsize(),
                        dec_state->shared->dc->Plane(i).ysize()})) {
      return JXL_FAILURE("Frame dimensions are too big for the image.");
    }
  }

  for (size_t by = 0; by < ysize_blocks; ++by) {
    get_block->StartRow(by);
    size_t sby[3] = {by >> vshift[0], by >> vshift[1], by >> vshift[2]};

    const int32_t* JXL_RESTRICT row_quant =
        block_rect.ConstRow(dec_state->shared->raw_quant_field, by);

    const float* JXL_RESTRICT dc_rows[3] = {
        r[0].ConstPlaneRow(*dec_state->shared->dc, 0, sby[0]),
        r[1].ConstPlaneRow(*dec_state->shared->dc, 1, sby[1]),
        r[2].ConstPlaneRow(*dec_state->shared->dc, 2, sby[2]),
    };

    const size_t ty = (block_rect.y0() + by) / kColorTileDimInBlocks;
    AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by);

    const int8_t* JXL_RESTRICT row_cmap[3] = {
        dec_state->shared->cmap.ytox_map.ConstRow(ty),
        nullptr,
        dec_state->shared->cmap.ytob_map.ConstRow(ty),
    };

    float* JXL_RESTRICT idct_row[3];
    int16_t* JXL_RESTRICT jpeg_row[3];
    for (size_t c = 0; c < 3; c++) {
      const auto& buffer = render_pipeline_input.GetBuffer(c);
      idct_row[c] = buffer.second.Row(buffer.first, sby[c] * kBlockDim);
      if (jpeg_data) {
        auto& component = jpeg_data->components[jpeg_c_map[c]];
        jpeg_row[c] =
            component.coeffs.data() +
            (component.width_in_blocks * (r[c].y0() + sby[c]) + r[c].x0()) *
                kDCTBlockSize;
      }
    }

    size_t bx = 0;
    for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
         tx++) {
      size_t abs_tx = tx + block_rect.x0() / kColorTileDimInBlocks;
      auto x_cc_mul = Set(d, color_correlation.YtoXRatio(row_cmap[0][abs_tx]));
      auto b_cc_mul = Set(d, color_correlation.YtoBRatio(row_cmap[2][abs_tx]));
      // Increment bx by llf_x because those iterations would otherwise
      // immediately continue (!IsFirstBlock). Reduces mispredictions.
      for (; bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks;) {
        size_t sbx[3] = {bx >> hshift[0], bx >> hshift[1], bx >> hshift[2]};
        AcStrategy acs = acs_row[bx];
        const size_t llf_x = acs.covered_blocks_x();

        // Can only happen in the second or lower rows of a varblock.
        if (JXL_UNLIKELY(!acs.IsFirstBlock())) {
          bx += llf_x;
          continue;
        }
        const size_t log2_covered_blocks = acs.log2_covered_blocks();

        const size_t covered_blocks = 1 << log2_covered_blocks;
        const size_t size = covered_blocks * kDCTBlockSize;

        ACPtr qblock[3];
        if (accumulate) {
          for (size_t c = 0; c < 3; c++) {
            qblock[c] = dec_state->coefficients->PlaneRow(c, group_idx, offset);
          }
        } else {
          // No point in reading from bitstream without accumulating and not
          // drawing.
          JXL_ENSURE(draw == kDraw);
          if (ac_type == ACType::k16) {
            memset(group_dec_cache->dec_group_qblock16, 0,
                   size * 3 * sizeof(int16_t));
            for (size_t c = 0; c < 3; c++) {
              qblock[c].ptr16 = group_dec_cache->dec_group_qblock16 + c * size;
            }
          } else {
            memset(group_dec_cache->dec_group_qblock, 0,
                   size * 3 * sizeof(int32_t));
            for (size_t c = 0; c < 3; c++) {
              qblock[c].ptr32 = group_dec_cache->dec_group_qblock + c * size;
            }
          }
        }
        JXL_RETURN_IF_ERROR(get_block->LoadBlock(
            bx, by, acs, size, log2_covered_blocks, qblock, ac_type));
        offset += size;
        if (draw == kDontDraw) {
          bx += llf_x;
          continue;
        }

        if (JXL_UNLIKELY(jpeg_data)) {
          if (acs.Strategy() != AcStrategyType::DCT) {
            return JXL_FAILURE(
                "Can only decode to JPEG if only DCT-8 is used.");
          }

          HWY_ALIGN int32_t transposed_dct_y[64];
          for (size_t c : {1, 0, 2}) {
            // Propagate only Y for grayscale.
            if (jpeg_is_gray && c != 1) {
              continue;
            }
            if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
              continue;
            }
            int16_t* JXL_RESTRICT jpeg_pos =
                jpeg_row[c] + sbx[c] * kDCTBlockSize;
            // JPEG XL is transposed, JPEG is not.
            auto* transposed_dct = qblock[c].ptr32;
            Transpose8x8InPlace(transposed_dct);
            // No CfL - no need to store the y block converted to integers.
            if (!cs.Is444() ||
                (row_cmap[0][abs_tx] == 0 && row_cmap[2][abs_tx] == 0)) {
              for (size_t i = 0; i < 64; i += Lanes(d)) {
                const auto ini = Load(di, transposed_dct + i);
                const auto ini16 = DemoteTo(di16, ini);
                StoreU(ini16, di16, jpeg_pos + i);
              }
            } else if (c == 1) {
              // Y channel: save for restoring X/B, but nothing else to do.
              for (size_t i = 0; i < 64; i += Lanes(d)) {
                const auto ini = Load(di, transposed_dct + i);
                Store(ini, di, transposed_dct_y + i);
                const auto ini16 = DemoteTo(di16, ini);
                StoreU(ini16, di16, jpeg_pos + i);
              }
            } else {
              // transposed_dct_y contains the y channel block, transposed.
              const auto scale =
                  Set(di, ColorCorrelation::RatioJPEG(row_cmap[c][abs_tx]));
              const auto round = Set(di, 1 << (kCFLFixedPointPrecision - 1));
              for (int i = 0; i < 64; i += Lanes(d)) {
                auto in = Load(di, transposed_dct + i);
                auto in_y = Load(di, transposed_dct_y + i);
                auto qt = Load(di, scaled_qtable + c * size + i);
                auto coeff_scale = ShiftRight<kCFLFixedPointPrecision>(
                    Add(Mul(qt, scale), round));
                auto cfl_factor = ShiftRight<kCFLFixedPointPrecision>(
                    Add(Mul(in_y, coeff_scale), round));
                StoreU(DemoteTo(di16, Add(in, cfl_factor)), di16, jpeg_pos + i);
              }
            }
            jpeg_pos[0] =
                Clamp1<float>(dc_rows[c][sbx[c]] - dcoff[c], -2047, 2047);
            auto overflow = MaskFromVec(Set(di16_full, 0));
            auto underflow = MaskFromVec(Set(di16_full, 0));
            for (int i = 0; i < 64; i += Lanes(di16_full)) {
              auto in = LoadU(di16_full, jpeg_pos + i);
              overflow = Or(overflow, Gt(in, kJpegDctMax));
              underflow = Or(underflow, Lt(in, kJpegDctMin));
            }
            if (!AllFalse(di16_full, Or(overflow, underflow))) {
              return JXL_FAILURE("JPEG DCT coefficients out of range");
            }
          }
        } else {
          HWY_ALIGN floatconst block = group_dec_cache->dec_group_block;
          // Dequantize and add predictions.
          dequant_block(
              acs, inv_global_scale, row_quant[bx], dec_state->x_dm_multiplier,
              dec_state->b_dm_multiplier, x_cc_mul, b_cc_mul, acs.Strategy(),
              size, dec_state->shared->quantizer,
              acs.covered_blocks_y() * acs.covered_blocks_x(), sbx, dc_rows,
              dc_stride,
              dec_state->output_encoding_info.opsin_params.quant_biases, qblock,
              block, group_dec_cache->scratch_space);

          for (size_t c : {1, 0, 2}) {
            if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
              continue;
            }
            // IDCT
            float* JXL_RESTRICT idct_pos = idct_row[c] + sbx[c] * kBlockDim;
            TransformToPixels(acs.Strategy(), block + c * size, idct_pos,
                              idct_stride[c], group_dec_cache->scratch_space);
          }
        }
        bx += llf_x;
      }
    }
  }
  return true;
}

// NOLINTNEXTLINE(google-readability-namespace-comments)
}  // namespace HWY_NAMESPACE
}  // namespace jxl
HWY_AFTER_NAMESPACE();

#if HWY_ONCE
namespace jxl {
namespace {
// Decode quantized AC coefficients of DCT blocks.
// LLF components in the output block will not be modified.
template <ACType ac_type, bool uses_lz77>
Status DecodeACVarBlock(size_t ctx_offset, size_t log2_covered_blocks,
                        int32_t* JXL_RESTRICT row_nzeros,
                        const int32_t* JXL_RESTRICT row_nzeros_top,
                        size_t nzeros_stride, size_t c, size_t bx, size_t by,
                        size_t lbx, AcStrategy acs,
                        const coeff_order_t* JXL_RESTRICT coeff_order,
                        BitReader* JXL_RESTRICT br,
                        ANSSymbolReader* JXL_RESTRICT decoder,
                        const std::vector<uint8_t>& context_map,
                        const uint8_t* qdc_row, const int32_t* qf_row,
                        const BlockCtxMap& block_ctx_map, ACPtr block,
                        size_t shift = 0) {
  // Equal to number of LLF coefficients.
  const size_t covered_blocks = 1 << log2_covered_blocks;
  const size_t size = covered_blocks * kDCTBlockSize;
  int32_t predicted_nzeros =
      PredictFromTopAndLeft(row_nzeros_top, row_nzeros, bx, 32);

  size_t ord = kStrategyOrder[acs.RawStrategy()];
  const coeff_order_t* JXL_RESTRICT order =
      &coeff_order[CoeffOrderOffset(ord, c)];

  size_t block_ctx = block_ctx_map.Context(qdc_row[lbx], qf_row[bx], ord, c);
  const int32_t nzero_ctx =
      block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx) + ctx_offset;

  size_t nzeros =
      decoder->ReadHybridUintInlined<uses_lz77>(nzero_ctx, br, context_map);
  if (nzeros > size - covered_blocks) {
    return JXL_FAILURE("Invalid AC: nzeros %" PRIuS " too large for %" PRIuS
                       " 8x8 blocks",
                       nzeros, covered_blocks);
  }
  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
      row_nzeros[bx + x + y * nzeros_stride] =
          (nzeros + covered_blocks - 1) >> log2_covered_blocks;
    }
  }

  const size_t histo_offset =
      ctx_offset + block_ctx_map.ZeroDensityContextsOffset(block_ctx);

  size_t prev = (nzeros > size / 16 ? 0 : 1);
  for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
    const size_t ctx =
        histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
                                          log2_covered_blocks, prev);
    const size_t u_coeff =
        decoder->ReadHybridUintInlined<uses_lz77>(ctx, br, context_map);
    // Hand-rolled version of UnpackSigned, shifting before the conversion to
    // signed integer to avoid undefined behavior of shifting negative numbers.
    const size_t magnitude = u_coeff >> 1;
    const size_t neg_sign = (~u_coeff) & 1;
    const intptr_t coeff =
        static_cast<intptr_t>((magnitude ^ (neg_sign - 1)) << shift);
    if (ac_type == ACType::k16) {
      block.ptr16[order[k]] += coeff;
    } else {
      block.ptr32[order[k]] += coeff;
    }
    prev = static_cast<size_t>(u_coeff != 0);
    nzeros -= prev;
  }
  if (JXL_UNLIKELY(nzeros != 0)) {
    return JXL_FAILURE("Invalid AC: nzeros at end of block is %" PRIuS
                       ", should be 0. Block (%" PRIuS ", %" PRIuS
                       "), channel %" PRIuS,
                       nzeros, bx, by, c);
  }

  return true;
}

// Structs used by DecodeGroupImpl to get a quantized block.
// GetBlockFromBitstream uses ANS decoding (and thus keeps track of row
// pointers in row_nzeros), GetBlockFromEncoder simply reads the coefficient
// image provided by the encoder.

struct GetBlockFromBitstream : public GetBlock {
  void StartRow(size_t by) override {
    qf_row = rect.ConstRow(*qf, by);
    for (size_t c = 0; c < 3; c++) {
      size_t sby = by >> vshift[c];
      quant_dc_row = quant_dc->ConstRow(rect.y0() + by) + rect.x0();
      for (size_t i = 0; i < num_passes; i++) {
        row_nzeros[i][c] = group_dec_cache->num_nzeroes[i].PlaneRow(c, sby);
        row_nzeros_top[i][c] =
            sby == 0
                ? nullptr
                : group_dec_cache->num_nzeroes[i].ConstPlaneRow(c, sby - 1);
      }
    }
  }

  Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
                   size_t log2_covered_blocks, ACPtr block[3],
                   ACType ac_type) override {
    ;
    for (size_t c : {1, 0, 2}) {
      size_t sbx = bx >> hshift[c];
      size_t sby = by >> vshift[c];
      if (JXL_UNLIKELY((sbx << hshift[c] != bx) || (sby << vshift[c] != by))) {
        continue;
      }

      for (size_t pass = 0; JXL_UNLIKELY(pass < num_passes); pass++) {
        auto decode_ac_varblock =
            decoders[pass].UsesLZ77()
                ? (ac_type == ACType::k16 ? DecodeACVarBlock<ACType::k16, 1>
                                          : DecodeACVarBlock<ACType::k32, 1>)
                : (ac_type == ACType::k16 ? DecodeACVarBlock<ACType::k16, 0>
                                          : DecodeACVarBlock<ACType::k32, 0>);
        JXL_RETURN_IF_ERROR(decode_ac_varblock(
            ctx_offset[pass], log2_covered_blocks, row_nzeros[pass][c],
            row_nzeros_top[pass][c], nzeros_stride, c, sbx, sby, bx, acs,
            &coeff_orders[pass * coeff_order_size], readers[pass],
            &decoders[pass], context_map[pass], quant_dc_row, qf_row,
            *block_ctx_map, block[c], shift_for_pass[pass]));
      }
    }
    return true;
  }

  Status Init(const FrameHeader& frame_header,
              BitReader* JXL_RESTRICT* JXL_RESTRICT readers, size_t num_passes,
              size_t group_idx, size_t histo_selector_bits, const Rect& rect,
              GroupDecCache* JXL_RESTRICT group_dec_cache,
              PassesDecoderState* dec_state, size_t first_pass) {
    for (size_t i = 0; i < 3; i++) {
      hshift[i] = frame_header.chroma_subsampling.HShift(i);
      vshift[i] = frame_header.chroma_subsampling.VShift(i);
    }
    this->coeff_order_size = dec_state->shared->coeff_order_size;
    this->coeff_orders =
        dec_state->shared->coeff_orders.data() + first_pass * coeff_order_size;
    this->context_map = dec_state->context_map.data() + first_pass;
    this->readers = readers;
    this->num_passes = num_passes;
    this->shift_for_pass = frame_header.passes.shift + first_pass;
    this->group_dec_cache = group_dec_cache;
    this->rect = rect;
    block_ctx_map = &dec_state->shared->block_ctx_map;
    qf = &dec_state->shared->raw_quant_field;
    quant_dc = &dec_state->shared->quant_dc;

    for (size_t pass = 0; pass < num_passes; pass++) {
      // Select which histogram set to use among those of the current pass.
      size_t cur_histogram = 0;
      if (histo_selector_bits != 0) {
        cur_histogram = readers[pass]->ReadBits(histo_selector_bits);
      }
      if (cur_histogram >= dec_state->shared->num_histograms) {
        return JXL_FAILURE("Invalid histogram selector");
      }
      ctx_offset[pass] = cur_histogram * block_ctx_map->NumACContexts();

      JXL_ASSIGN_OR_RETURN(
          decoders[pass],
          ANSSymbolReader::Create(&dec_state->code[pass + first_pass],
                                  readers[pass]));
    }
    nzeros_stride = group_dec_cache->num_nzeroes[0].PixelsPerRow();
    for (size_t i = 0; i < num_passes; i++) {
      JXL_ENSURE(
          nzeros_stride ==
          static_cast<size_t>(group_dec_cache->num_nzeroes[i].PixelsPerRow()));
    }
    return true;
  }

  const uint32_t* shift_for_pass = nullptr;  // not owned
  const coeff_order_t* JXL_RESTRICT coeff_orders;
  size_t coeff_order_size;
  const std::vector<uint8_t>* JXL_RESTRICT context_map;
  ANSSymbolReader decoders[kMaxNumPasses];
  BitReader* JXL_RESTRICT* JXL_RESTRICT readers;
  size_t num_passes;
  size_t ctx_offset[kMaxNumPasses];
  size_t nzeros_stride;
  int32_t* JXL_RESTRICT row_nzeros[kMaxNumPasses][3];
  const int32_t* JXL_RESTRICT row_nzeros_top[kMaxNumPasses][3];
  GroupDecCache* JXL_RESTRICT group_dec_cache;
  const BlockCtxMap* block_ctx_map;
  const ImageI* qf;
  const ImageB* quant_dc;
  const int32_t* qf_row;
  const uint8_t* quant_dc_row;
  Rect rect;
  size_t hshift[3], vshift[3];
};

struct GetBlockFromEncoder : public GetBlock {
  void StartRow(size_t by) override {}

  Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
                   size_t log2_covered_blocks, ACPtr block[3],
                   ACType ac_type) override {
    JXL_ENSURE(ac_type == ACType::k32);
    for (size_t c = 0; c < 3; c++) {
      // for each pass
      for (size_t i = 0; i < quantized_ac->size(); i++) {
        for (size_t k = 0; k < size; k++) {
          // TODO(veluca): SIMD.
          block[c].ptr32[k] +=
              rows[i][c][offset + k] * (1 << shift_for_pass[i]);
        }
      }
    }
    offset += size;
    return true;
  }

  static StatusOr<GetBlockFromEncoder> Create(
      const std::vector<std::unique_ptr<ACImage>>& ac, size_t group_idx,
      const uint32_t* shift_for_pass) {
    GetBlockFromEncoder result(ac, group_idx, shift_for_pass);
    // TODO(veluca): not supported with chroma subsampling.
    for (size_t i = 0; i < ac.size(); i++) {
      JXL_ENSURE(ac[i]->Type() == ACType::k32);
      for (size_t c = 0; c < 3; c++) {
        result.rows[i][c] = ac[i]->PlaneRow(c, group_idx, 0).ptr32;
      }
    }
    return result;
  }

  const std::vector<std::unique_ptr<ACImage>>* JXL_RESTRICT quantized_ac;
  size_t offset = 0;
  const int32_t* JXL_RESTRICT rows[kMaxNumPasses][3];
  const uint32_t* shift_for_pass = nullptr;  // not owned

 private:
  GetBlockFromEncoder(const std::vector<std::unique_ptr<ACImage>>& ac,
                      size_t group_idx, const uint32_t* shift_for_pass)
      : quantized_ac(&ac), shift_for_pass(shift_for_pass) {}
};

HWY_EXPORT(DecodeGroupImpl);

}  // namespace

Status DecodeGroup(const FrameHeader& frame_header,
                   BitReader* JXL_RESTRICT* JXL_RESTRICT readers,
                   size_t num_passes, size_t group_idx,
                   PassesDecoderState* JXL_RESTRICT dec_state,
                   GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread,
                   RenderPipelineInput& render_pipeline_input,
                   jpeg::JPEGData* JXL_RESTRICT jpeg_data, size_t first_pass,
                   bool force_draw, bool dc_only, bool* should_run_pipeline) {
  JxlMemoryManager* memory_manager = dec_state->memory_manager();
  DrawMode draw =
      (num_passes + first_pass == frame_header.passes.num_passes) || force_draw
          ? kDraw
          : kDontDraw;

  if (should_run_pipeline) {
    *should_run_pipeline = draw != kDontDraw;
  }

  if (draw == kDraw && num_passes == 0 && first_pass == 0) {
    JXL_RETURN_IF_ERROR(group_dec_cache->InitDCBufferOnce(memory_manager));
    const YCbCrChromaSubsampling& cs = frame_header.chroma_subsampling;
    for (size_t c : {0, 1, 2}) {
      size_t hs = cs.HShift(c);
      size_t vs = cs.VShift(c);
      // We reuse filter_input_storage here as it is not currently in use.
      const Rect src_rect_precs =
          dec_state->shared->frame_dim.BlockGroupRect(group_idx);
      const Rect src_rect =
          Rect(src_rect_precs.x0() >> hs, src_rect_precs.y0() >> vs,
               src_rect_precs.xsize() >> hs, src_rect_precs.ysize() >> vs);
      const Rect copy_rect(kRenderPipelineXOffset, 2, src_rect.xsize(),
                           src_rect.ysize());
      JXL_RETURN_IF_ERROR(
          CopyImageToWithPadding(src_rect, dec_state->shared->dc->Plane(c), 2,
                                 copy_rect, &group_dec_cache->dc_buffer));
      // Mirrorpad. Interleaving left and right padding ensures that padding
      // works out correctly even for images with DC size of 1.
      for (size_t y = 0; y < src_rect.ysize() + 4; y++) {
        size_t xend = kRenderPipelineXOffset +
                      (dec_state->shared->dc->Plane(c).xsize() >> hs) -
                      src_rect.x0();
        for (size_t ix = 0; ix < 2; ix++) {
          if (src_rect.x0() == 0) {
            group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset - ix - 1] =
                group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset + ix];
          }
          if (src_rect.x0() + src_rect.xsize() + 2 >=
              (dec_state->shared->dc->xsize() >> hs)) {
            group_dec_cache->dc_buffer.Row(y)[xend + ix] =
                group_dec_cache->dc_buffer.Row(y)[xend - ix - 1];
          }
        }
      }
      const auto& buffer = render_pipeline_input.GetBuffer(c);
      Rect dst_rect = buffer.second;
      ImageF* upsampling_dst = buffer.first;
      JXL_ENSURE(dst_rect.IsInside(*upsampling_dst));

      RenderPipelineStage::RowInfo input_rows(1, std::vector<float*>(5));
      RenderPipelineStage::RowInfo output_rows(1, std::vector<float*>(8));
      for (size_t y = src_rect.y0(); y < src_rect.y0() + src_rect.ysize();
           y++) {
        for (ssize_t iy = 0; iy < 5; iy++) {
          input_rows[0][iy] = group_dec_cache->dc_buffer.Row(
              Mirror(static_cast<ssize_t>(y) + iy - 2,
                     dec_state->shared->dc->Plane(c).ysize() >> vs) +
              2 - src_rect.y0());
        }
        for (size_t iy = 0; iy < 8; iy++) {
          output_rows[0][iy] =
              dst_rect.Row(upsampling_dst, ((y - src_rect.y0()) << 3) + iy) -
              kRenderPipelineXOffset;
        }
        // Arguments set to 0/nullptr are not used.
        JXL_RETURN_IF_ERROR(dec_state->upsampler8x->ProcessRow(
            input_rows, output_rows,
            /*xextra=*/0, src_rect.xsize(), 0, 0, thread));
      }
    }
    return true;
  }

  size_t histo_selector_bits = 0;
  if (dc_only) {
    JXL_ENSURE(num_passes == 0);
  } else {
    JXL_ENSURE(dec_state->shared->num_histograms > 0);
    histo_selector_bits = CeilLog2Nonzero(dec_state->shared->num_histograms);
  }

  auto get_block = jxl::make_unique<GetBlockFromBitstream>();
  JXL_RETURN_IF_ERROR(get_block->Init(
      frame_header, readers, num_passes, group_idx, histo_selector_bits,
      dec_state->shared->frame_dim.BlockGroupRect(group_idx), group_dec_cache,
      dec_state, first_pass));

  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(
      frame_header, get_block.get(), group_dec_cache, dec_state, thread,
      group_idx, render_pipeline_input, jpeg_data, draw));

  for (size_t pass = 0; pass < num_passes; pass++) {
    if (!get_block->decoders[pass].CheckANSFinalState()) {
      return JXL_FAILURE("ANS checksum failure.");
    }
  }
  return true;
}

Status DecodeGroupForRoundtrip(const FrameHeader& frame_header,
                               const std::vector<std::unique_ptr<ACImage>>& ac,
                               size_t group_idx,
                               PassesDecoderState* JXL_RESTRICT dec_state,
                               GroupDecCache* JXL_RESTRICT group_dec_cache,
                               size_t thread,
                               RenderPipelineInput& render_pipeline_input,
                               jpeg::JPEGData* JXL_RESTRICT jpeg_data,
                               AuxOut* aux_out) {
  JxlMemoryManager* memory_manager = dec_state->memory_manager();
  JXL_ASSIGN_OR_RETURN(
      GetBlockFromEncoder get_block,
      GetBlockFromEncoder::Create(ac, group_idx, frame_header.passes.shift));
  JXL_RETURN_IF_ERROR(group_dec_cache->InitOnce(
      memory_manager,
      /*num_passes=*/0,
      /*used_acs=*/(1u << AcStrategy::kNumValidStrategies) - 1));

  return HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(
      frame_header, &get_block, group_dec_cache, dec_state, thread, group_idx,
      render_pipeline_input, jpeg_data, kDraw);
}

}  // namespace jxl
#endif  // HWY_ONCE

Messung V0.5
C=94 H=94 G=93

¤ Dauer der Verarbeitung: 0.8 Sekunden  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.






                                                                                                                                                                                                                                                                                                                                                                                                     


Neuigkeiten

     Aktuelles
     Motto des Tages

Software

     Produkte
     Quellcodebibliothek

Aktivitäten

     Artikel über Sicherheit
     Anleitung zur Aktivierung von SSL

Muße

     Gedichte
     Musik
     Bilder

Jenseits des Üblichen ....
    

Besucherstatistik

Besucherstatistik

Monitoring

Montastic status badge