Quelle rescaler_mips_dsp_r2.c Sprache: C

// Copyright 2014 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// MIPS version of rescaling functions
//
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)

#include "src/dsp/dsp.h"

#if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)

#include <assert.h>
#include "src/utils/rescaler_utils.h"

#define ROUNDER (WEBP_RESCALER_ONE >> 1)
#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)

//------------------------------------------------------------------------------
// Row export

#if 0  // disabled for now. TODO(skal): make match the C-code
static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
  int i;
  const int x_out_max = wrk->dst_width * wrk->num_channels;
  uint8_t* dst = wrk->dst;
  rescaler_t* irow = wrk->irow;
  const rescaler_t* frow = wrk->frow;
  const int yscale = wrk->fy_scale * (-wrk->y_accum);
  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
  const int temp7 = (int)wrk->fxy_scale;
  const int temp6 = (x_out_max & ~0x3) << 2;
  assert(!WebPRescalerOutputDone(wrk));
  assert(wrk->y_accum <= 0);
  assert(!wrk->y_expand);
  assert(wrk->fxy_scale != 0);
  if (yscale) {
    if (x_out_max >= 4) {
      int temp8, temp9, temp10, temp11;
      __asm__ volatile (
        "li %[temp3], 0x10000 \n\t"
        "li %[temp4], 0x8000 \n\t"
        "addu %[loop_end], %[frow], %[temp6] \n\t"
      "1: \n\t"
        "lw %[temp0], 0(%[frow]) \n\t"
        "lw %[temp1], 4(%[frow]) \n\t"
        "lw %[temp2], 8(%[frow]) \n\t"
        "lw %[temp5], 12(%[frow]) \n\t"
        "mult $ac0, %[temp3], %[temp4] \n\t"
        "maddu $ac0, %[temp0], %[yscale] \n\t"
        "mult $ac1, %[temp3], %[temp4] \n\t"
        "maddu $ac1, %[temp1], %[yscale] \n\t"
        "mult $ac2, %[temp3], %[temp4] \n\t"
        "maddu $ac2, %[temp2], %[yscale] \n\t"
        "mult $ac3, %[temp3], %[temp4] \n\t"
        "maddu $ac3, %[temp5], %[yscale] \n\t"
        "addiu %[frow], %[frow], 16 \n\t"
        "mfhi %[temp0], $ac0 \n\t"
        "mfhi %[temp1], $ac1 \n\t"
        "mfhi %[temp2], $ac2 \n\t"
        "mfhi %[temp5], $ac3 \n\t"
        "lw %[temp8], 0(%[irow]) \n\t"
        "lw %[temp9], 4(%[irow]) \n\t"
        "lw %[temp10], 8(%[irow]) \n\t"
        "lw %[temp11], 12(%[irow]) \n\t"
        "addiu %[dst], %[dst], 4 \n\t"
        "addiu %[irow], %[irow], 16 \n\t"
        "subu %[temp8], %[temp8], %[temp0] \n\t"
        "subu %[temp9], %[temp9], %[temp1] \n\t"
        "subu %[temp10], %[temp10], %[temp2] \n\t"
        "subu %[temp11], %[temp11], %[temp5] \n\t"
        "mult $ac0, %[temp3], %[temp4] \n\t"
        "maddu $ac0, %[temp8], %[temp7] \n\t"
        "mult $ac1, %[temp3], %[temp4] \n\t"
        "maddu $ac1, %[temp9], %[temp7] \n\t"
        "mult $ac2, %[temp3], %[temp4] \n\t"
        "maddu $ac2, %[temp10], %[temp7] \n\t"
        "mult $ac3, %[temp3], %[temp4] \n\t"
        "maddu $ac3, %[temp11], %[temp7] \n\t"
        "mfhi %[temp8], $ac0 \n\t"
        "mfhi %[temp9], $ac1 \n\t"
        "mfhi %[temp10], $ac2 \n\t"
        "mfhi %[temp11], $ac3 \n\t"
        "sw %[temp0], -16(%[irow]) \n\t"
        "sw %[temp1], -12(%[irow]) \n\t"
        "sw %[temp2], -8(%[irow]) \n\t"
        "sw %[temp5], -4(%[irow]) \n\t"
        "sb %[temp8], -4(%[dst]) \n\t"
        "sb %[temp9], -3(%[dst]) \n\t"
        "sb %[temp10], -2(%[dst]) \n\t"
        "sb %[temp11], -1(%[dst]) \n\t"
        "bne %[frow], %[loop_end], 1b \n\t"
        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
        : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
      );
    }
    for (i = 0; i < (x_out_max & 0x3); ++i) {
      const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(*frow++, yscale);
      const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
      *dst++ = (v > 255) ? 255u : (uint8_t)v;
      *irow++ = frac;   // new fractional start
    }
  } else {
    if (x_out_max >= 4) {
      __asm__ volatile (
        "li %[temp3], 0x10000 \n\t"
        "li %[temp4], 0x8000 \n\t"
        "addu %[loop_end], %[irow], %[temp6] \n\t"
      "1: \n\t"
        "lw %[temp0], 0(%[irow]) \n\t"
        "lw %[temp1], 4(%[irow]) \n\t"
        "lw %[temp2], 8(%[irow]) \n\t"
        "lw %[temp5], 12(%[irow]) \n\t"
        "addiu %[dst], %[dst], 4 \n\t"
        "addiu %[irow], %[irow], 16 \n\t"
        "mult $ac0, %[temp3], %[temp4] \n\t"
        "maddu $ac0, %[temp0], %[temp7] \n\t"
        "mult $ac1, %[temp3], %[temp4] \n\t"
        "maddu $ac1, %[temp1], %[temp7] \n\t"
        "mult $ac2, %[temp3], %[temp4] \n\t"
        "maddu $ac2, %[temp2], %[temp7] \n\t"
        "mult $ac3, %[temp3], %[temp4] \n\t"
        "maddu $ac3, %[temp5], %[temp7] \n\t"
        "mfhi %[temp0], $ac0 \n\t"
        "mfhi %[temp1], $ac1 \n\t"
        "mfhi %[temp2], $ac2 \n\t"
        "mfhi %[temp5], $ac3 \n\t"
        "sw $zero, -16(%[irow]) \n\t"
        "sw $zero, -12(%[irow]) \n\t"
        "sw $zero, -8(%[irow]) \n\t"
        "sw $zero, -4(%[irow]) \n\t"
        "sb %[temp0], -4(%[dst]) \n\t"
        "sb %[temp1], -3(%[dst]) \n\t"
        "sb %[temp2], -2(%[dst]) \n\t"
        "sb %[temp5], -1(%[dst]) \n\t"
        "bne %[irow], %[loop_end], 1b \n\t"
        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
        : [temp7]"r"(temp7), [temp6]"r"(temp6)
        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
      );
    }
    for (i = 0; i < (x_out_max & 0x3); ++i) {
      const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
      *dst++ = (v > 255) ? 255u : (uint8_t)v;
      *irow++ = 0;
    }
  }
}
#endif  // 0

static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
  int i;
  uint8_t* dst = wrk->dst;
  rescaler_t* irow = wrk->irow;
  const int x_out_max = wrk->dst_width * wrk->num_channels;
  const rescaler_t* frow = wrk->frow;
  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
  const int temp6 = (x_out_max & ~0x3) << 2;
  const int temp7 = (int)wrk->fy_scale;
  assert(!WebPRescalerOutputDone(wrk));
  assert(wrk->y_accum <= 0);
  assert(wrk->y_expand);
  assert(wrk->y_sub != 0);
  if (wrk->y_accum == 0) {
    if (x_out_max >= 4) {
      __asm__ volatile (
        "li %[temp4], 0x10000 \n\t"
        "li %[temp5], 0x8000 \n\t"
        "addu %[loop_end], %[frow], %[temp6] \n\t"
      "1: \n\t"
        "lw %[temp0], 0(%[frow]) \n\t"
        "lw %[temp1], 4(%[frow]) \n\t"
        "lw %[temp2], 8(%[frow]) \n\t"
        "lw %[temp3], 12(%[frow]) \n\t"
        "addiu %[dst], %[dst], 4 \n\t"
        "addiu %[frow], %[frow], 16 \n\t"
        "mult $ac0, %[temp4], %[temp5] \n\t"
        "maddu $ac0, %[temp0], %[temp7] \n\t"
        "mult $ac1, %[temp4], %[temp5] \n\t"
        "maddu $ac1, %[temp1], %[temp7] \n\t"
        "mult $ac2, %[temp4], %[temp5] \n\t"
        "maddu $ac2, %[temp2], %[temp7] \n\t"
        "mult $ac3, %[temp4], %[temp5] \n\t"
        "maddu $ac3, %[temp3], %[temp7] \n\t"
        "mfhi %[temp0], $ac0 \n\t"
        "mfhi %[temp1], $ac1 \n\t"
        "mfhi %[temp2], $ac2 \n\t"
        "mfhi %[temp3], $ac3 \n\t"
        "sb %[temp0], -4(%[dst]) \n\t"
        "sb %[temp1], -3(%[dst]) \n\t"
        "sb %[temp2], -2(%[dst]) \n\t"
        "sb %[temp3], -1(%[dst]) \n\t"
        "bne %[frow], %[loop_end], 1b \n\t"
        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
        : [temp7]"r"(temp7), [temp6]"r"(temp6)
        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
      );
    }
    for (i = 0; i < (x_out_max & 0x3); ++i) {
      const uint32_t J = *frow++;
      const int v = (int)MULT_FIX(J, wrk->fy_scale);
      *dst++ = (v > 255) ? 255u : (uint8_t)v;
    }
  } else {
    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
    if (x_out_max >= 4) {
      int temp8, temp9, temp10, temp11;
      __asm__ volatile (
        "li %[temp8], 0x10000 \n\t"
        "li %[temp9], 0x8000 \n\t"
        "addu %[loop_end], %[frow], %[temp6] \n\t"
      "1: \n\t"
        "lw %[temp0], 0(%[frow]) \n\t"
        "lw %[temp1], 4(%[frow]) \n\t"
        "lw %[temp2], 8(%[frow]) \n\t"
        "lw %[temp3], 12(%[frow]) \n\t"
        "lw %[temp4], 0(%[irow]) \n\t"
        "lw %[temp5], 4(%[irow]) \n\t"
        "lw %[temp10], 8(%[irow]) \n\t"
        "lw %[temp11], 12(%[irow]) \n\t"
        "addiu %[dst], %[dst], 4 \n\t"
        "mult $ac0, %[temp8], %[temp9] \n\t"
        "maddu $ac0, %[A], %[temp0] \n\t"
        "maddu $ac0, %[B], %[temp4] \n\t"
        "mult $ac1, %[temp8], %[temp9] \n\t"
        "maddu $ac1, %[A], %[temp1] \n\t"
        "maddu $ac1, %[B], %[temp5] \n\t"
        "mult $ac2, %[temp8], %[temp9] \n\t"
        "maddu $ac2, %[A], %[temp2] \n\t"
        "maddu $ac2, %[B], %[temp10] \n\t"
        "mult $ac3, %[temp8], %[temp9] \n\t"
        "maddu $ac3, %[A], %[temp3] \n\t"
        "maddu $ac3, %[B], %[temp11] \n\t"
        "addiu %[frow], %[frow], 16 \n\t"
        "addiu %[irow], %[irow], 16 \n\t"
        "mfhi %[temp0], $ac0 \n\t"
        "mfhi %[temp1], $ac1 \n\t"
        "mfhi %[temp2], $ac2 \n\t"
        "mfhi %[temp3], $ac3 \n\t"
        "mult $ac0, %[temp8], %[temp9] \n\t"
        "maddu $ac0, %[temp0], %[temp7] \n\t"
        "mult $ac1, %[temp8], %[temp9] \n\t"
        "maddu $ac1, %[temp1], %[temp7] \n\t"
        "mult $ac2, %[temp8], %[temp9] \n\t"
        "maddu $ac2, %[temp2], %[temp7] \n\t"
        "mult $ac3, %[temp8], %[temp9] \n\t"
        "maddu $ac3, %[temp3], %[temp7] \n\t"
        "mfhi %[temp0], $ac0 \n\t"
        "mfhi %[temp1], $ac1 \n\t"
        "mfhi %[temp2], $ac2 \n\t"
        "mfhi %[temp3], $ac3 \n\t"
        "sb %[temp0], -4(%[dst]) \n\t"
        "sb %[temp1], -3(%[dst]) \n\t"
        "sb %[temp2], -2(%[dst]) \n\t"
        "sb %[temp3], -1(%[dst]) \n\t"
        "bne %[frow], %[loop_end], 1b \n\t"
        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
        : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
      );
    }
    for (i = 0; i < (x_out_max & 0x3); ++i) {
      const uint64_t I = (uint64_t)A * *frow++
                       + (uint64_t)B * *irow++;
      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
      const int v = (int)MULT_FIX(J, wrk->fy_scale);
      *dst++ = (v > 255) ? 255u : (uint8_t)v;
    }
  }
}

#undef MULT_FIX_FLOOR
#undef MULT_FIX
#undef ROUNDER

//------------------------------------------------------------------------------
// Entry point

extern void WebPRescalerDspInitMIPSdspR2(void);

WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
  WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
//  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
}

#else  // !WEBP_USE_MIPS_DSP_R2

WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)

#endif  // WEBP_USE_MIPS_DSP_R2

Messung V0.5

¤ Dauer der Verarbeitung: 0.10 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.