; ; Copyright (c) 2016, Alliance for Open Media. All rights reserved. ; ; This source code is subject to the terms of the BSD 2 Clause License and ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License ; was not distributed with this source code in the LICENSE file, you can ; obtain it at www.aomedia.org/license/software. If the Alliance for Open ; Media Patent License 1.0 was not distributed with this source code in the ; PATENTS file, you can obtain it at www.aomedia.org/license/patent. ;
;
%include"third_party/x86inc/x86inc.asm"
SECTION .text
; 'spill_src_stride' affect a lot how the code works. ; ; When 'spill_src_stride' is false, the 'src_strideq' resides in ; register, [srcq + src_strideq + offset] is allowed, so we can simply ; use such form to access src memory and don't bother to update 'srcq' ; at each line. We only update 'srcq' each two-lines using a compact ; LEA instruction like [srcq+src_strideq*2]. ; ; When 'spill_src_stride' is true, the 'src_strideq' resides in memory. ; we cannot use above form to access memory, we have to update ; 'srcq' at each line break. As we process two parts (first,second) ; together in each macro function, the second part may also sit ; in the next line, which means we also need to possibly add ; one 'src_strideq' to 'srcq' before processing second part.
; This is specically designed to handle when src_strideq is a ; memory position, under such case, we can not accomplish ; complex address calculation using LEA, and fall back to ; using simple ADD instruction at each line ending.
%macro ADVANCE_END_OF_TWO_LINES 0
%if spill_src_stride add srcq, src_strideq
%else lea srcq, [srcq+src_strideq*2]
%endif
; note: ref_stride is never spilled when processing two lines lea ref1q, [ref1q+ref_strideq*2] lea ref2q, [ref2q+ref_strideq*2] lea ref3q, [ref3q+ref_strideq*2] lea ref4q, [ref4q+ref_strideq*2]
%endmacro
; Whether a shared offset should be used instead of adding strides to ; each reference array. With this option, only one line will be processed ; per loop iteration.
%define use_ref_offset (%1 >= mmsize)
; Remove loops in the 4x4 and 8x4 case
%define use_loop (use_ref_offset || %2 > 4)
; Is the loop for this wxh in another function? ; If so, we jump into that function for the loop and returning
%define external_loop (use_ref_offset && %1 > mmsize && %1 != %2)
%if use_ref_offset
PROCESS_FIRST_MMSIZE
%if %1 > mmsize mov ref_offsetq, 0 mov cntd, %2 >> %3 ; Jump part way into the loop for the square version of this width
%if %3 == 1 jmp mangle(private_prefix %+ _sad_skip_%1x%1x4d %+ SUFFIX).midloop
%else jmp mangle(private_prefix %+ _sad%1x%1x4d %+ SUFFIX).midloop
%endif
%else mov ref_offsetq, ref_strideq add srcq, src_strideq mov cntd, (%2 >> %3) - 1
%endif
%if external_loop == 0
.loop: ; Unrolled horizontal loop
%assign h_offset 0
%rep %1/mmsize
PROCESS_16x1x4 h_offset
%if h_offset == 0 ; The first row of the first column is done outside the loop and jumps here
.midloop:
%endif
%assign h_offset h_offset+mmsize
%endrep
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.