; ; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2) ; ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB ; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander. ; Copyright (C) 2018, Matthias Räncker. ; Copyright (C) 2023, Aliaksiej Kandracienka. ; ; Based on the x86 SIMD extension for IJG JPEG library ; Copyright (C) 1999-2006, MIYASAKA Masaru. ; For conditions of distribution and use, see copyright notice in jsimdext.inc ; ; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include"jcolsamp.inc"
; -------------------------------------------------------------------------- ; ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. ; ; GLOBAL(void) ; jsimd_h2v1_merged_upsample_sse2(JDIMENSION output_width, ; JSAMPIMAGE input_buf, ; JDIMENSION in_row_group_ctr, ; JSAMPARRAY output_buf); ;
EXTN(jsimd_h2v1_merged_upsample_sse2):
ENDBR64 push rbp mov rbp, rsp push r15 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits ; Allocate stack space for wk array. r15 is used to access it. mov r15, rsp sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
COLLECT_ARGS 4 push rbx
.column_st32: lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE cmp rcx, byte 2*SIZEOF_XMMWORD
jb short .column_st16
movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD add rdi, byte 2*SIZEOF_XMMWORD ; outptr
movdqa xmmA, xmmF sub rcx, byte 2*SIZEOF_XMMWORD jmpshort .column_st15
.column_st16: cmp rcx, byte SIZEOF_XMMWORD
jb short .column_st15
movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA, xmmD sub rcx, byte SIZEOF_XMMWORD
.column_st15: ; Store the lower 8 bytes of xmmA to the output when it has enough ; space. cmp rcx, byte SIZEOF_MMWORD
jb short .column_st7
movq XMM_MMWORD [rdi], xmmA add rdi, byte SIZEOF_MMWORD sub rcx, byte SIZEOF_MMWORD
psrldq xmmA, SIZEOF_MMWORD
.column_st7: ; Store the lower 4 bytes of xmmA to the output when it has enough ; space. cmp rcx, byte SIZEOF_DWORD
jb short .column_st3 movd XMM_DWORD [rdi], xmmA add rdi, byte SIZEOF_DWORD sub rcx, byte SIZEOF_DWORD
psrldq xmmA, SIZEOF_DWORD
.column_st3: ; Store the lower 2 bytes of rax to the output when it has enough ; space. movdeax, xmmA cmp rcx, byte SIZEOF_WORD
jb short .column_st1 movword [rdi], ax add rdi, byte SIZEOF_WORD sub rcx, byte SIZEOF_WORD shr rax, 16
.column_st1: ; Store the lower 1 byte of rax to the output when it has enough ; space.
test rcx, rcx jzshort .endcolumn movbyte [rdi], al
.column_st32: cmp rcx, byte SIZEOF_XMMWORD/2
jb short .column_st16
movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD add rdi, byte 2*SIZEOF_XMMWORD ; outptr
movdqa xmmA, xmmC
movdqa xmmD, xmmH sub rcx, byte SIZEOF_XMMWORD/2
.column_st16: cmp rcx, byte SIZEOF_XMMWORD/4
jb short .column_st15
movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA, xmmD sub rcx, byte SIZEOF_XMMWORD/4
.column_st15: ; Store two pixels (8 bytes) of xmmA to the output when it has enough ; space. cmp rcx, byte SIZEOF_XMMWORD/8
jb short .column_st7
movq XMM_MMWORD [rdi], xmmA add rdi, byte SIZEOF_XMMWORD/8*4 sub rcx, byte SIZEOF_XMMWORD/8
psrldq xmmA, SIZEOF_XMMWORD/8*4
.column_st7: ; Store one pixel (4 bytes) of xmmA to the output when it has enough ; space.
test rcx, rcx jzshort .endcolumn movd XMM_DWORD [rdi], xmmA
%endif ; RGB_PIXELSIZE ; ---------------
.endcolumn:
sfence ; flush the write buffer
.return: pop rbx
UNCOLLECT_ARGS 4 lea rsp, [rbp-8] pop r15 pop rbp ret
; -------------------------------------------------------------------------- ; ; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. ; ; GLOBAL(void) ; jsimd_h2v2_merged_upsample_sse2(JDIMENSION output_width, ; JSAMPIMAGE input_buf, ; JDIMENSION in_row_group_ctr, ; JSAMPARRAY output_buf); ;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.