;****************************************************************************** ;* Core video DSP functions ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> ;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* ;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public ;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;******************************************************************************
%include"libavutil/x86/x86util.asm"
SECTION .text
; slow vertical extension loop function. Works with variable-width, and ; does per-line reading/writing of source data
%macro V_COPY_ROW 2 ; type (top/body/bottom), h
.%1_y_loop: ; do { mov wq, r7mp ; initialize w (r7mp = wmp)
.%1_x_loop: ; do {
movu m0, [srcq+wq] ; m0 = read($mmsize)
movu [dstq+wq], m0 ; write(m0, $mmsize) add wq, mmsize ; w -= $mmsize cmp wq, -mmsize ; } while (w > $mmsize); jl .%1_x_loop
movu m0, [srcq-mmsize] ; m0 = read($mmsize)
movu [dstq-mmsize], m0 ; write(m0, $mmsize)
%ifidn %1, body ; if ($type == body) { add srcq, src_strideq ; src += src_stride
%endif ; } add dstq, dst_strideq ; dst += dst_stride dec %2 ; } while (--$h); jnz .%1_y_loop
%endmacro
; .----. <- zero ; | | <- top is copied from first line in body of source ; |----| <- start_y ; | | <- body is copied verbatim (line-by-line) from source ; |----| <- end_y ; | | <- bottom is copied from last line in body of source ; '----' <- bh
INIT_XMM sse
%if ARCH_X86_64
cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \
start_y, end_y, bh, w
%else; x86-32
cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, bh, w
%define src_strideq r3mp
%define dst_strideq r1mp mov srcq, r2mp mov start_yq, r4mp mov end_yq, r5mp mov bhq, r6mp
%endif sub bhq, end_yq ; bh -= end_q sub end_yq, start_yq ; end_q -= start_q add srcq, r7mp ; (r7mp = wmp) add dstq, r7mp ; (r7mp = wmp) neg r7mp ; (r7mp = wmp)
test start_yq, start_yq ; if (start_q) { jz .body
V_COPY_ROW top, start_yq ; v_copy_row(top, start_yq)
.body: ; }
V_COPY_ROW body, end_yq ; v_copy_row(body, end_yq)
test bhq, bhq ; if (bh) { jz .end sub srcq, src_strideq ; src -= src_stride
V_COPY_ROW bottom, bhq ; v_copy_row(bottom, bh)
.end: ; } RET
%macro hvar_fn 0
cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w lea dstq, [dstq+n_wordsq*2] neg n_wordsq lea start_xq, [start_xq+n_wordsq*2]
.y_loop: ; do {
%if cpuflag(avx2)
vpbroadcastb m0, [dstq+start_xq] mov wq, n_wordsq ; initialize w
%else
movzx wd, byte [dstq+start_xq] ; w = read(1)
imul wd, 0x01010101 ; w *= 0x01010101 movd m0, wd mov wq, n_wordsq ; initialize w
pshufd m0, m0, q0000 ; splat
%endif ; avx2
.x_loop: ; do {
movu [dstq+wq*2], m0 ; write($reg, $mmsize) add wq, mmsize/2 ; w -= $mmsize/2 cmp wq, -(mmsize/2) ; } while (w > $mmsize/2) jl .x_loop
movu [dstq-mmsize], m0 ; write($reg, $mmsize) add dstq, dst_strideq ; dst += dst_stride dec hq ; } while (h--) jnz .y_loop RET
%endmacro
; macro to read/write a horizontal number of pixels (%2) to/from registers ; on sse, - fills xmm0-15 for consecutive sets of 16 pixels ; - if (%2 & 8) fills 8 bytes into xmm$next ; - if (%2 & 4) fills 4 bytes into xmm$next ; - if (%2 & 3) fills 1, 2 or 4 bytes in eax ; writing data out is in the same way
%macro READ_NUM_BYTES 2
%assign %%off 0 ; offset in source buffer
%assign %%xmm_idx 0 ; xmm register index
; left/right (horizontal) fast extend functions ; these are essentially identical to the vertical extend ones above, ; just left/right separated because number of pixels to extend is ; obviously not the same on both sides.
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.