Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/C/Firefox/gfx/ycbcr/   (Browser von der Mozilla Stiftung Version 136.0.1©)  Datei vom 10.2.2025 mit Größe 24 kB image not shown  

Quelle  yuv_row_posix.cpp

  Sprache: C
 

// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "yuv_row.h"
#include "mozilla/SSE.h"

#define DCHECK(a)

extern "C" {

#if defined(ARCH_CPU_X86_64)

// We don't need CPUID guards here, since x86-64 implies SSE2.

// AMD64 ABI uses register paremters.
void FastConvertYUVToRGB32Row(const uint8_t* y_buf,  // rdi
                              const uint8_t* u_buf,  // rsi
                              const uint8_t* v_buf,  // rdx
                              uint8_t* rgb_buf,      // rcx
                              int width) {         // r8
  asm volatile(
  "jmp    1f\n"
"0:"
  "movzb  (%[u_buf]),%%r10\n"
  "add    $0x1,%[u_buf]\n"
  "movzb  (%[v_buf]),%%r11\n"
  "add    $0x1,%[v_buf]\n"
  "movq   2048(%[kCoefficientsRgbY],%%r10,8),%%xmm0\n"
  "movzb  (%[y_buf]),%%r10\n"
  "movq   4096(%[kCoefficientsRgbY],%%r11,8),%%xmm1\n"
  "movzb  0x1(%[y_buf]),%%r11\n"
  "paddsw %%xmm1,%%xmm0\n"
  "movq   (%[kCoefficientsRgbY],%%r10,8),%%xmm2\n"
  "add    $0x2,%[y_buf]\n"
  "movq   (%[kCoefficientsRgbY],%%r11,8),%%xmm3\n"
  "paddsw %%xmm0,%%xmm2\n"
  "paddsw %%xmm0,%%xmm3\n"
  "shufps $0x44,%%xmm3,%%xmm2\n"
  "psraw  $0x6,%%xmm2\n"
  "packuswb %%xmm2,%%xmm2\n"
  "movq   %%xmm2,0x0(%[rgb_buf])\n"
  "add    $0x8,%[rgb_buf]\n"
"1:"
  "sub    $0x2,%[width]\n"
  "jns    0b\n"

"2:"
  "add    $0x1,%[width]\n"
  "js     3f\n"

  "movzb  (%[u_buf]),%%r10\n"
  "movq   2048(%[kCoefficientsRgbY],%%r10,8),%%xmm0\n"
  "movzb  (%[v_buf]),%%r10\n"
  "movq   4096(%[kCoefficientsRgbY],%%r10,8),%%xmm1\n"
  "paddsw %%xmm1,%%xmm0\n"
  "movzb  (%[y_buf]),%%r10\n"
  "movq   (%[kCoefficientsRgbY],%%r10,8),%%xmm1\n"
  "paddsw %%xmm0,%%xmm1\n"
  "psraw  $0x6,%%xmm1\n"
  "packuswb %%xmm1,%%xmm1\n"
  "movd   %%xmm1,0x0(%[rgb_buf])\n"
"3:"
  : [y_buf] "+r"(y_buf),
    [u_buf] "+r"(u_buf),
    [v_buf] "+r"(v_buf),
    [rgb_buf] "+r"(rgb_buf),
    [width] "+r"(width)
  : [kCoefficientsRgbY] "r" (kCoefficientsRgbY)
  : "cc""memory""r10""r11""xmm0""xmm1""xmm2""xmm3"
);
}

void ScaleYUVToRGB32Row(const uint8_t* y_buf,  // rdi
                        const uint8_t* u_buf,  // rsi
                        const uint8_t* v_buf,  // rdx
                        uint8_t* rgb_buf,      // rcx
                        int width,           // r8
                        int source_dx) {     // r9
  asm volatile(
  "xor    %%r11,%%r11\n"
  "sub    $0x2,%[width]\n"
  "js     1f\n"

"0:"
  "mov    %%r11,%%r10\n"
  "sar    $0x11,%%r10\n"
  "movzb  (%[u_buf],%%r10,1),%%rax\n"
  "movq   2048(%[kCoefficientsRgbY],%%rax,8),%%xmm0\n"
  "movzb  (%[v_buf],%%r10,1),%%rax\n"
  "movq   4096(%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
  "lea    (%%r11,%[source_dx]),%%r10\n"
  "sar    $0x10,%%r11\n"
  "movzb  (%[y_buf],%%r11,1),%%rax\n"
  "paddsw %%xmm1,%%xmm0\n"
  "movq   (%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
  "lea    (%%r10,%[source_dx]),%%r11\n"
  "sar    $0x10,%%r10\n"
  "movzb  (%[y_buf],%%r10,1),%%rax\n"
  "movq   (%[kCoefficientsRgbY],%%rax,8),%%xmm2\n"
  "paddsw %%xmm0,%%xmm1\n"
  "paddsw %%xmm0,%%xmm2\n"
  "shufps $0x44,%%xmm2,%%xmm1\n"
  "psraw  $0x6,%%xmm1\n"
  "packuswb %%xmm1,%%xmm1\n"
  "movq   %%xmm1,0x0(%[rgb_buf])\n"
  "add    $0x8,%[rgb_buf]\n"
  "sub    $0x2,%[width]\n"
  "jns    0b\n"

"1:"
  "add    $0x1,%[width]\n"
  "js     2f\n"

  "mov    %%r11,%%r10\n"
  "sar    $0x11,%%r10\n"
  "movzb  (%[u_buf],%%r10,1),%%rax\n"
  "movq   2048(%[kCoefficientsRgbY],%%rax,8),%%xmm0\n"
  "movzb  (%[v_buf],%%r10,1),%%rax\n"
  "movq   4096(%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
  "paddsw %%xmm1,%%xmm0\n"
  "sar    $0x10,%%r11\n"
  "movzb  (%[y_buf],%%r11,1),%%rax\n"
  "movq   (%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
  "paddsw %%xmm0,%%xmm1\n"
  "psraw  $0x6,%%xmm1\n"
  "packuswb %%xmm1,%%xmm1\n"
  "movd   %%xmm1,0x0(%[rgb_buf])\n"

"2:"
  : [rgb_buf] "+r"(rgb_buf),
    [width] "+r"(width)
  : [y_buf] "r"(y_buf),
    [u_buf] "r"(u_buf),
    [v_buf] "r"(v_buf),
    [kCoefficientsRgbY] "r" (kCoefficientsRgbY),
    [source_dx] "r"(static_cast<long>(source_dx))
  : "cc""memory""r10""r11""rax""xmm0""xmm1""xmm2"
);
}

void LinearScaleYUVToRGB32Row(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* rgb_buf,
                              int width,
                              int source_dx) {
  asm volatile(
  "xor    %%r11,%%r11\n"   // x = 0
  "sub    $0x2,%[width]\n"
  "js     2f\n"
  "cmp    $0x20000,%[source_dx]\n"   // if source_dx >= 2.0
  "jl     0f\n"
  "mov    $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
"0:"

"1:"
  "mov    %%r11,%%r10\n"
  "sar    $0x11,%%r10\n"

  "movzb  (%[u_buf], %%r10, 1), %%r13 \n"
  "movzb  1(%[u_buf], %%r10, 1), %%r14 \n"
  "mov    %%r11, %%rax \n"
  "and    $0x1fffe, %%rax \n"
  "imul   %%rax, %%r14 \n"
  "xor    $0x1fffe, %%rax \n"
  "imul   %%rax, %%r13 \n"
  "add    %%r14, %%r13 \n"
  "shr    $17, %%r13 \n"
  "movq   2048(%[kCoefficientsRgbY],%%r13,8), %%xmm0\n"

  "movzb  (%[v_buf], %%r10, 1), %%r13 \n"
  "movzb  1(%[v_buf], %%r10, 1), %%r14 \n"
  "mov    %%r11, %%rax \n"
  "and    $0x1fffe, %%rax \n"
  "imul   %%rax, %%r14 \n"
  "xor    $0x1fffe, %%rax \n"
  "imul   %%rax, %%r13 \n"
  "add    %%r14, %%r13 \n"
  "shr    $17, %%r13 \n"
  "movq   4096(%[kCoefficientsRgbY],%%r13,8), %%xmm1\n"

  "mov    %%r11, %%rax \n"
  "lea    (%%r11,%[source_dx]),%%r10\n"
  "sar    $0x10,%%r11\n"
  "paddsw %%xmm1,%%xmm0\n"

  "movzb  (%[y_buf], %%r11, 1), %%r13 \n"
  "movzb  1(%[y_buf], %%r11, 1), %%r14 \n"
  "and    $0xffff, %%rax \n"
  "imul   %%rax, %%r14 \n"
  "xor    $0xffff, %%rax \n"
  "imul   %%rax, %%r13 \n"
  "add    %%r14, %%r13 \n"
  "shr    $16, %%r13 \n"
  "movq   (%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"

  "mov    %%r10, %%rax \n"
  "lea    (%%r10,%[source_dx]),%%r11\n"
  "sar    $0x10,%%r10\n"

  "movzb  (%[y_buf],%%r10,1), %%r13 \n"
  "movzb  1(%[y_buf],%%r10,1), %%r14 \n"
  "and    $0xffff, %%rax \n"
  "imul   %%rax, %%r14 \n"
  "xor    $0xffff, %%rax \n"
  "imul   %%rax, %%r13 \n"
  "add    %%r14, %%r13 \n"
  "shr    $16, %%r13 \n"
  "movq   (%[kCoefficientsRgbY],%%r13,8),%%xmm2\n"

  "paddsw %%xmm0,%%xmm1\n"
  "paddsw %%xmm0,%%xmm2\n"
  "shufps $0x44,%%xmm2,%%xmm1\n"
  "psraw  $0x6,%%xmm1\n"
  "packuswb %%xmm1,%%xmm1\n"
  "movq   %%xmm1,0x0(%[rgb_buf])\n"
  "add    $0x8,%[rgb_buf]\n"
  "sub    $0x2,%[width]\n"
  "jns    1b\n"

"2:"
  "add    $0x1,%[width]\n"
  "js     3f\n"

  "mov    %%r11,%%r10\n"
  "sar    $0x11,%%r10\n"

  "movzb  (%[u_buf],%%r10,1), %%r13 \n"
  "movq   2048(%[kCoefficientsRgbY],%%r13,8),%%xmm0\n"

  "movzb  (%[v_buf],%%r10,1), %%r13 \n"
  "movq   4096(%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"

  "paddsw %%xmm1,%%xmm0\n"
  "sar    $0x10,%%r11\n"

  "movzb  (%[y_buf],%%r11,1), %%r13 \n"
  "movq   (%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"

  "paddsw %%xmm0,%%xmm1\n"
  "psraw  $0x6,%%xmm1\n"
  "packuswb %%xmm1,%%xmm1\n"
  "movd   %%xmm1,0x0(%[rgb_buf])\n"

"3:"
  : [rgb_buf] "+r"(rgb_buf),
    [width] "+r"(width)
  : [y_buf] "r"(y_buf),
    [u_buf] "r"(u_buf),
    [v_buf] "r"(v_buf),
    [kCoefficientsRgbY] "r" (kCoefficientsRgbY),
    [source_dx] "r"(static_cast<long>(source_dx))
  : "cc""memory""r10""r11""r13""r14""rax""xmm0""xmm1""xmm2"
);
}

#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__)

// PIC version is slower because less registers are available, so
// non-PIC is used on platforms where it is possible.
void FastConvertYUVToRGB32Row_SSE(const uint8_t* y_buf,
                                  const uint8_t* u_buf,
                                  const uint8_t* v_buf,
                                  uint8_t* rgb_buf,
                                  int width);
  asm(
  ".text\n"
  ".global FastConvertYUVToRGB32Row_SSE\n"
  ".type FastConvertYUVToRGB32Row_SSE, @function\n"
"FastConvertYUVToRGB32Row_SSE:\n"
  "pusha\n"
  "mov    0x24(%esp),%edx\n"
  "mov    0x28(%esp),%edi\n"
  "mov    0x2c(%esp),%esi\n"
  "mov    0x30(%esp),%ebp\n"
  "mov    0x34(%esp),%ecx\n"
  "jmp    1f\n"

"0:"
  "movzbl (%edi),%eax\n"
  "add    $0x1,%edi\n"
  "movzbl (%esi),%ebx\n"
  "add    $0x1,%esi\n"
  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
  "movzbl (%edx),%eax\n"
  "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
  "movzbl 0x1(%edx),%ebx\n"
  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
  "add    $0x2,%edx\n"
  "movq   kCoefficientsRgbY(,%ebx,8),%mm2\n"
  "paddsw %mm0,%mm1\n"
  "paddsw %mm0,%mm2\n"
  "psraw  $0x6,%mm1\n"
  "psraw  $0x6,%mm2\n"
  "packuswb %mm2,%mm1\n"
  "movntq %mm1,0x0(%ebp)\n"
  "add    $0x8,%ebp\n"
"1:"
  "sub    $0x2,%ecx\n"
  "jns    0b\n"

  "and    $0x1,%ecx\n"
  "je     2f\n"

  "movzbl (%edi),%eax\n"
  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
  "movzbl (%esi),%eax\n"
  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
  "movzbl (%edx),%eax\n"
  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
  "paddsw %mm0,%mm1\n"
  "psraw  $0x6,%mm1\n"
  "packuswb %mm1,%mm1\n"
  "movd   %mm1,0x0(%ebp)\n"
"2:"
  "popa\n"
  "ret\n"
#if !defined(XP_MACOSX)
  ".previous\n"
#endif
);

void FastConvertYUVToRGB32Row(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* rgb_buf,
                              int width)
{
  if (mozilla::supports_sse()) {
    FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width);
    return;
  }

  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}


void ScaleYUVToRGB32Row_SSE(const uint8_t* y_buf,
                            const uint8_t* u_buf,
                            const uint8_t* v_buf,
                            uint8_t* rgb_buf,
                            int width,
                            int source_dx);
  asm(
  ".text\n"
  ".global ScaleYUVToRGB32Row_SSE\n"
  ".type ScaleYUVToRGB32Row_SSE, @function\n"
"ScaleYUVToRGB32Row_SSE:\n"
  "pusha\n"
  "mov    0x24(%esp),%edx\n"
  "mov    0x28(%esp),%edi\n"
  "mov    0x2c(%esp),%esi\n"
  "mov    0x30(%esp),%ebp\n"
  "mov    0x34(%esp),%ecx\n"
  "xor    %ebx,%ebx\n"
  "jmp    1f\n"

"0:"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%edi,%eax,1),%eax\n"
  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%esi,%eax,1),%eax\n"
  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%eax\n"
  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%eax\n"
  "movq   kCoefficientsRgbY(,%eax,8),%mm2\n"
  "paddsw %mm0,%mm1\n"
  "paddsw %mm0,%mm2\n"
  "psraw  $0x6,%mm1\n"
  "psraw  $0x6,%mm2\n"
  "packuswb %mm2,%mm1\n"
  "movntq %mm1,0x0(%ebp)\n"
  "add    $0x8,%ebp\n"
"1:"
  "sub    $0x2,%ecx\n"
  "jns    0b\n"

  "and    $0x1,%ecx\n"
  "je     2f\n"

  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%edi,%eax,1),%eax\n"
  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%esi,%eax,1),%eax\n"
  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%eax\n"
  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
  "paddsw %mm0,%mm1\n"
  "psraw  $0x6,%mm1\n"
  "packuswb %mm1,%mm1\n"
  "movd   %mm1,0x0(%ebp)\n"

"2:"
  "popa\n"
  "ret\n"
#if !defined(XP_MACOSX)
  ".previous\n"
#endif
);

void ScaleYUVToRGB32Row(const uint8_t* y_buf,
                        const uint8_t* u_buf,
                        const uint8_t* v_buf,
                        uint8_t* rgb_buf,
                        int width,
                        int source_dx)
{
  if (mozilla::supports_sse()) {
    ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
                           width, source_dx);
    return;
  }

  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
                       width, source_dx);
}

void LinearScaleYUVToRGB32Row_SSE(const uint8_t* y_buf,
                                  const uint8_t* u_buf,
                                  const uint8_t* v_buf,
                                  uint8_t* rgb_buf,
                                  int width,
                                  int source_dx);
  asm(
  ".text\n"
  ".global LinearScaleYUVToRGB32Row_SSE\n"
  ".type LinearScaleYUVToRGB32Row_SSE, @function\n"
"LinearScaleYUVToRGB32Row_SSE:\n"
  "pusha\n"
  "mov    0x24(%esp),%edx\n"
  "mov    0x28(%esp),%edi\n"
  "mov    0x30(%esp),%ebp\n"

  // source_width = width * source_dx + ebx
  "mov    0x34(%esp), %ecx\n"
  "imull  0x38(%esp), %ecx\n"
  "mov    %ecx, 0x34(%esp)\n"

  "mov    0x38(%esp), %ecx\n"
  "xor    %ebx,%ebx\n"     // x = 0
  "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
  "jl     1f\n"
  "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
  "jmp    1f\n"

"0:"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"

  "movzbl (%edi,%eax,1),%ecx\n"
  "movzbl 1(%edi,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "andl   $0x1fffe, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0x1fffe, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $17, %ecx \n"
  "movq   kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"

  "mov    0x2c(%esp),%esi\n"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"

  "movzbl (%esi,%eax,1),%ecx\n"
  "movzbl 1(%esi,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "andl   $0x1fffe, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0x1fffe, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $17, %ecx \n"
  "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"

  "mov    %ebx,%eax\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%ecx\n"
  "movzbl 1(%edx,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "andl   $0xffff, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0xffff, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $16, %ecx \n"
  "movq   kCoefficientsRgbY(,%ecx,8),%mm1\n"

  "cmp    0x34(%esp), %ebx\n"
  "jge    2f\n"

  "mov    %ebx,%eax\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%ecx\n"
  "movzbl 1(%edx,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "andl   $0xffff, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0xffff, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $16, %ecx \n"
  "movq   kCoefficientsRgbY(,%ecx,8),%mm2\n"

  "paddsw %mm0,%mm1\n"
  "paddsw %mm0,%mm2\n"
  "psraw  $0x6,%mm1\n"
  "psraw  $0x6,%mm2\n"
  "packuswb %mm2,%mm1\n"
  "movntq %mm1,0x0(%ebp)\n"
  "add    $0x8,%ebp\n"

"1:"
  "cmp    0x34(%esp), %ebx\n"
  "jl     0b\n"
  "popa\n"
  "ret\n"

"2:"
  "paddsw %mm0, %mm1\n"
  "psraw $6, %mm1\n"
  "packuswb %mm1, %mm1\n"
  "movd %mm1, (%ebp)\n"
  "popa\n"
  "ret\n"
#if !defined(XP_MACOSX)
  ".previous\n"
#endif
);

void LinearScaleYUVToRGB32Row(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* rgb_buf,
                              int width,
                              int source_dx)
{
  if (mozilla::supports_sse()) {
    LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
                                 width, source_dx);
    return;
  }

  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
                             width, source_dx);
}

#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__)

void PICConvertYUVToRGB32Row_SSE(const uint8_t* y_buf,
                                 const uint8_t* u_buf,
                                 const uint8_t* v_buf,
                                 uint8_t* rgb_buf,
                                 int width,
                                 const int16_t *kCoefficientsRgbY);

  asm(
  ".text\n"
#if defined(XP_MACOSX)
"_PICConvertYUVToRGB32Row_SSE:\n"
#else
"PICConvertYUVToRGB32Row_SSE:\n"
#endif
  "pusha\n"
  "mov    0x24(%esp),%edx\n"
  "mov    0x28(%esp),%edi\n"
  "mov    0x2c(%esp),%esi\n"
  "mov    0x30(%esp),%ebp\n"
  "mov    0x38(%esp),%ecx\n"

  "jmp    1f\n"

"0:"
  "movzbl (%edi),%eax\n"
  "add    $0x1,%edi\n"
  "movzbl (%esi),%ebx\n"
  "add    $0x1,%esi\n"
  "movq   2048(%ecx,%eax,8),%mm0\n"
  "movzbl (%edx),%eax\n"
  "paddsw 4096(%ecx,%ebx,8),%mm0\n"
  "movzbl 0x1(%edx),%ebx\n"
  "movq   0(%ecx,%eax,8),%mm1\n"
  "add    $0x2,%edx\n"
  "movq   0(%ecx,%ebx,8),%mm2\n"
  "paddsw %mm0,%mm1\n"
  "paddsw %mm0,%mm2\n"
  "psraw  $0x6,%mm1\n"
  "psraw  $0x6,%mm2\n"
  "packuswb %mm2,%mm1\n"
  "movntq %mm1,0x0(%ebp)\n"
  "add    $0x8,%ebp\n"
"1:"
  "subl   $0x2,0x34(%esp)\n"
  "jns    0b\n"

  "andl   $0x1,0x34(%esp)\n"
  "je     2f\n"

  "movzbl (%edi),%eax\n"
  "movq   2048(%ecx,%eax,8),%mm0\n"
  "movzbl (%esi),%eax\n"
  "paddsw 4096(%ecx,%eax,8),%mm0\n"
  "movzbl (%edx),%eax\n"
  "movq   0(%ecx,%eax,8),%mm1\n"
  "paddsw %mm0,%mm1\n"
  "psraw  $0x6,%mm1\n"
  "packuswb %mm1,%mm1\n"
  "movd   %mm1,0x0(%ebp)\n"
"2:"
  "popa\n"
  "ret\n"
#if !defined(XP_MACOSX)
  ".previous\n"
#endif
);

void FastConvertYUVToRGB32Row(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* rgb_buf,
                              int width)
{
  if (mozilla::supports_sse()) {
    PICConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
                                &kCoefficientsRgbY[0][0]);
    return;
  }

  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}

void PICScaleYUVToRGB32Row_SSE(const uint8_t* y_buf,
                               const uint8_t* u_buf,
                               const uint8_t* v_buf,
                               uint8_t* rgb_buf,
                               int width,
                               int source_dx,
                               const int16_t *kCoefficientsRgbY);

  asm(
  ".text\n"
#if defined(XP_MACOSX)
"_PICScaleYUVToRGB32Row_SSE:\n"
#else
"PICScaleYUVToRGB32Row_SSE:\n"
#endif
  "pusha\n"
  "mov    0x24(%esp),%edx\n"
  "mov    0x28(%esp),%edi\n"
  "mov    0x2c(%esp),%esi\n"
  "mov    0x30(%esp),%ebp\n"
  "mov    0x3c(%esp),%ecx\n"
  "xor    %ebx,%ebx\n"
  "jmp    1f\n"

"0:"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%edi,%eax,1),%eax\n"
  "movq   2048(%ecx,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%esi,%eax,1),%eax\n"
  "paddsw 4096(%ecx,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%eax\n"
  "movq   0(%ecx,%eax,8),%mm1\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%eax\n"
  "movq   0(%ecx,%eax,8),%mm2\n"
  "paddsw %mm0,%mm1\n"
  "paddsw %mm0,%mm2\n"
  "psraw  $0x6,%mm1\n"
  "psraw  $0x6,%mm2\n"
  "packuswb %mm2,%mm1\n"
  "movntq %mm1,0x0(%ebp)\n"
  "add    $0x8,%ebp\n"
"1:"
  "subl   $0x2,0x34(%esp)\n"
  "jns    0b\n"

  "andl   $0x1,0x34(%esp)\n"
  "je     2f\n"

  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%edi,%eax,1),%eax\n"
  "movq   2048(%ecx,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"
  "movzbl (%esi,%eax,1),%eax\n"
  "paddsw 4096(%ecx,%eax,8),%mm0\n"
  "mov    %ebx,%eax\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%eax\n"
  "movq   0(%ecx,%eax,8),%mm1\n"
  "paddsw %mm0,%mm1\n"
  "psraw  $0x6,%mm1\n"
  "packuswb %mm1,%mm1\n"
  "movd   %mm1,0x0(%ebp)\n"

"2:"
  "popa\n"
  "ret\n"
#if !defined(XP_MACOSX)
  ".previous\n"
#endif
);

void ScaleYUVToRGB32Row(const uint8_t* y_buf,
                        const uint8_t* u_buf,
                        const uint8_t* v_buf,
                        uint8_t* rgb_buf,
                        int width,
                        int source_dx)
{
  if (mozilla::supports_sse()) {
    PICScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
                              &kCoefficientsRgbY[0][0]);
    return;
  }

  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}

void PICLinearScaleYUVToRGB32Row_SSE(const uint8_t* y_buf,
                                     const uint8_t* u_buf,
                                     const uint8_t* v_buf,
                                     uint8_t* rgb_buf,
                                     int width,
                                     int source_dx,
                                     const int16_t *kCoefficientsRgbY);

  asm(
  ".text\n"
#if defined(XP_MACOSX)
"_PICLinearScaleYUVToRGB32Row_SSE:\n"
#else
"PICLinearScaleYUVToRGB32Row_SSE:\n"
#endif
  "pusha\n"
  "mov    0x24(%esp),%edx\n"
  "mov    0x30(%esp),%ebp\n"
  "mov    0x34(%esp),%ecx\n"
  "mov    0x3c(%esp),%edi\n"
  "xor    %ebx,%ebx\n"

  // source_width = width * source_dx + ebx
  "mov    0x34(%esp), %ecx\n"
  "imull  0x38(%esp), %ecx\n"
  "mov    %ecx, 0x34(%esp)\n"

  "mov    0x38(%esp), %ecx\n"
  "xor    %ebx,%ebx\n"     // x = 0
  "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
  "jl     1f\n"
  "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
  "jmp    1f\n"

"0:"
  "mov    0x28(%esp),%esi\n"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"

  "movzbl (%esi,%eax,1),%ecx\n"
  "movzbl 1(%esi,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "andl   $0x1fffe, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0x1fffe, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $17, %ecx \n"
  "movq   2048(%edi,%ecx,8),%mm0\n"

  "mov    0x2c(%esp),%esi\n"
  "mov    %ebx,%eax\n"
  "sar    $0x11,%eax\n"

  "movzbl (%esi,%eax,1),%ecx\n"
  "movzbl 1(%esi,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "andl   $0x1fffe, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0x1fffe, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $17, %ecx \n"
  "paddsw 4096(%edi,%ecx,8),%mm0\n"

  "mov    %ebx,%eax\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%ecx\n"
  "movzbl 1(%edx,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "andl   $0xffff, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0xffff, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $16, %ecx \n"
  "movq   (%edi,%ecx,8),%mm1\n"

  "cmp    0x34(%esp), %ebx\n"
  "jge    2f\n"

  "mov    %ebx,%eax\n"
  "sar    $0x10,%eax\n"
  "movzbl (%edx,%eax,1),%ecx\n"
  "movzbl 1(%edx,%eax,1),%esi\n"
  "mov    %ebx,%eax\n"
  "add    0x38(%esp),%ebx\n"
  "andl   $0xffff, %eax \n"
  "imul   %eax, %esi \n"
  "xorl   $0xffff, %eax \n"
  "imul   %eax, %ecx \n"
  "addl   %esi, %ecx \n"
  "shrl   $16, %ecx \n"
  "movq   (%edi,%ecx,8),%mm2\n"

  "paddsw %mm0,%mm1\n"
  "paddsw %mm0,%mm2\n"
  "psraw  $0x6,%mm1\n"
  "psraw  $0x6,%mm2\n"
  "packuswb %mm2,%mm1\n"
  "movntq %mm1,0x0(%ebp)\n"
  "add    $0x8,%ebp\n"

"1:"
  "cmp    %ebx, 0x34(%esp)\n"
  "jg     0b\n"
  "popa\n"
  "ret\n"

"2:"
  "paddsw %mm0, %mm1\n"
  "psraw $6, %mm1\n"
  "packuswb %mm1, %mm1\n"
  "movd %mm1, (%ebp)\n"
  "popa\n"
  "ret\n"
#if !defined(XP_MACOSX)
  ".previous\n"
#endif
);


void LinearScaleYUVToRGB32Row(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* rgb_buf,
                              int width,
                              int source_dx)
{
  if (mozilla::supports_sse()) {
    PICLinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
                                    source_dx, &kCoefficientsRgbY[0][0]);
    return;
  }

  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
#else
void FastConvertYUVToRGB32Row(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* rgb_buf,
                              int width) {
  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}

void ScaleYUVToRGB32Row(const uint8_t* y_buf,
                        const uint8_t* u_buf,
                        const uint8_t* v_buf,
                        uint8_t* rgb_buf,
                        int width,
                        int source_dx) {
  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}

void LinearScaleYUVToRGB32Row(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* rgb_buf,
                              int width,
                              int source_dx) {
  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
#endif

}

Messung V0.5 in Prozent
C=97 H=88 G=92

¤ Dauer der Verarbeitung: 0.19 Sekunden  (vorverarbeitet am  2026-04-26) ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.