Quelle pixman-arm-simd-asm-scaled.S

Sprache: Sparc

/*
* Copyright © 2008 Mozilla Corporation
* Copyright © 2010 Nokia Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission.  Mozilla Corporation makes no
* representations about the suitability of this software for any purpose.  It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author:  Jeff Muizelaar (jeff@infidigm.net)
*
*/

/* Prevent the stack from becoming executable */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

.text
.arch armv6
.object_arch armv4
.arm
.altmacro
.p2align 2

#include "pixman-arm-asm.h"

pixman_syntax_unified

/*
* Note: This code is only using armv5te instructions (not even armv6),
*       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
*       be split into a few variants, tuned for each microarchitecture.
*
* TODO: In order to get good performance on ARM9/ARM11 cores (which don't
* have efficient write combining), it needs to be changed to use 16-byte
* aligned writes using STM instruction.
*
* Nearest scanline scaler macro template uses the following arguments:
*  fname                     - name of the function to generate
*  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
*  t                         - type suffix for LDR/STR instructions
*  prefetch_distance         - prefetch in the source image by that many
*                              pixels ahead
*  prefetch_braking_distance - stop prefetching when that many pixels are
*                              remaining before the end of scanline
*/

.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
                                      prefetch_distance,        \
                                      prefetch_braking_distance

pixman_asm_function \fname
W  .req r0
DST  .req r1
SRC  .req r2
VX  .req r3
UNIT_X  .req ip
TMP1  .req r4
TMP2  .req r5
VXMASK  .req r6
PF_OFFS  .req r7
SRC_WIDTH_FIXED .req r8

ldr UNIT_X, [sp]
push {r4, r5, r6, r7, r8, r10}
mvn VXMASK, #((1 << \bpp_shift) - 1)
ldr SRC_WIDTH_FIXED, [sp, #28]

/* define helper macro */
.macro scale_2_pixels
  ldr\()\t TMP1, [SRC, TMP1]
  and TMP2, VXMASK, VX, asr #(16 - \bpp_shift)
  adds VX, VX, UNIT_X
  str\()\t TMP1, [DST], #(1 << \bpp_shift)
9:  subspl VX, VX, SRC_WIDTH_FIXED
  bpl 9b

  ldr\()\t TMP2, [SRC, TMP2]
  and TMP1, VXMASK, VX, asr #(16 - \bpp_shift)
  adds VX, VX, UNIT_X
  str\()\t TMP2, [DST], #(1 << \bpp_shift)
9:  subspl VX, VX, SRC_WIDTH_FIXED
  bpl 9b
.endm

/* now do the scaling */
and TMP1, VXMASK, VX, asr #(16 - \bpp_shift)
adds VX, VX, UNIT_X
9: subspl VX, VX, SRC_WIDTH_FIXED
bpl 9b
subs W, W, #(8 + \prefetch_braking_distance)
blt 2f
/* calculate prefetch offset */
mov PF_OFFS, #\prefetch_distance
mla PF_OFFS, UNIT_X, PF_OFFS, VX
1: /* main loop, process 8 pixels per iteration with prefetch */
pld [SRC, PF_OFFS, asr #(16 - \bpp_shift)]
add PF_OFFS, PF_OFFS, UNIT_X, lsl #3
scale_2_pixels
scale_2_pixels
scale_2_pixels
scale_2_pixels
subs W, W, #8
bge 1b
2:
subs W, W, #(4 - 8 - \prefetch_braking_distance)
blt 2f
1: /* process the remaining pixels */
scale_2_pixels
scale_2_pixels
subs W, W, #4
bge 1b
2:
tst W, #2
beq 2f
scale_2_pixels
2:
tst W, #1
ldr\()\t\()ne TMP1, [SRC, TMP1]
str\()\t\()ne TMP1, [DST]
/* cleanup helper macro */
.purgem scale_2_pixels
.unreq DST
.unreq SRC
.unreq W
.unreq VX
.unreq UNIT_X
.unreq TMP1
.unreq TMP2
.unreq VXMASK
.unreq PF_OFFS
.unreq  SRC_WIDTH_FIXED
/* return */
pop {r4, r5, r6, r7, r8, r10}
bx lr
pixman_end_asm_function
.endm

generate_nearest_scanline_func \
    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32

generate_nearest_scanline_func \
    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.15 Sekunden (vorverarbeitet am 2026-04-28) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.