// Set up the src pointers to include the left edge, for LR_HAVE_LEFT, left == NULL
tst r5, #1 // LR_HAVE_LEFT
beq 1f
// LR_HAVE_LEFT
cmp r1, #0
bne 0f
// left == NULL sub r2, r2, #6
vld1.16 {q2, q3}, [r2]!
b 2f
0:
// LR_HAVE_LEFT, left != NULL
vld1.16 {q2, q3}, [r2]!
vld1.16 {d3}, [r1]!
// Move r2 back to account for the last 3 pixels we loaded earlier,
// which we'll shift out. sub r2, r2, #6
vext.8 q3, q2, q3, #10
vext.8 q2, q1, q2, #10
b 2f
1:
vld1.16 {q2, q3}, [r2]!
// !LR_HAVE_LEFT, fill q1 with the leftmost pixel
// and shift q2/q3 to have 3x the first pixel at the front.
vdup.16 q1, d4[0]
// Move r2 back to account for the last 3 pixels we loaded before,
// which we shifted out. sub r2, r2, #6
vext.8 q3, q2, q3, #10
vext.8 q2, q1, q2, #10
2:
tst r5, #2 // LR_HAVE_RIGHT
bne 4f
3: // !LR_HAVE_RIGHT
// Check whether we need to pad the right edge
cmp r4, #11
bge 4f // If w >= 11, all used input pixels are valid
// 1 <= w < 11, w+3 pixels valid in q2-q3. For w=9 or w=10,
// this ends up called again; it's not strictly needed in those
// cases (we pad enough here), but keeping the code as simple as possible.
// The padding pixel is q1/2.h[w+2]. r2 points at the next input, ie
// q1/2.h[16]. Thus read from r2[w-14] to find the padding pixel. sub r12, r4, #14
lsl r12, r12, #1
// Insert padding in q2/3.h[w+3] onwards; fuse the +3 (*2) into the
// buffer pointer.
movrel_local r3, right_ext_mask, -6
ldrh r12, [r2, r12] sub r3, r3, r4, lsl #1
vdup.16 q11, r12
vld1.8 {q9, q10}, [r3]
ble 9f
vmov q2, q3
tst r5, #2 // LR_HAVE_RIGHT
vld1.16 {q3}, [r2]!
bne 4b // If we don't need to pad, just keep filtering.
b 3b // If we need to pad, check how many pixels we have left.
9:
pop {r4-r6,pc}
endfunc
// void dav1d_wiener_filter_v_16bpc_neon(pixel *dst, int16_t **ptrs,
// const int16_t fv[8], const int w,
// const int bitdepth_max);
function wiener_filter_v_16bpc_neon, export=1
push {r4-r9,lr}
vpush {q4-q7}
ldr lr, [sp, #92] // bitdepth_max
vld1.16 {q0}, [r2, :128]
vdup.16 q2, lr
clz lr, lr sub lr, lr, #11 // round_bits_v
// Shift the pointers, but only update the first 5; the 6th pointer is
// kept as it was before (and the 7th is implicitly identical to the
// 6th).
ldrd r4, r5, [r1, #4]
ldrd r6, r7, [r1, #12]
ldr r8, [r1, #20]
strd r4, r5, [r1]
strd r6, r7, [r1, #8]
str r8, [r1, #16]
// Set up the src pointers to include the left edge, for LR_HAVE_LEFT, left == NULL
tst r5, #1 // LR_HAVE_LEFT
beq 1f
// LR_HAVE_LEFT
cmp r1, #0
bne 0f
// left == NULL sub r2, r2, #6
vld1.16 {q2, q3}, [r2]!
b 2f
0:
// LR_HAVE_LEFT, left != NULL
vld1.16 {q2, q3}, [r2]!
vld1.16 {d9}, [r1]!
// Move r2 back to account for the last 3 pixels we loaded earlier,
// which we'll shift out. sub r2, r2, #6
vext.8 q3, q2, q3, #10
vext.8 q2, q4, q2, #10
b 2f
1:
vld1.16 {q2, q3}, [r2]!
// !LR_HAVE_LEFT, fill q1 with the leftmost pixel
// and shift q2/q3 to have 3x the first pixel at the front.
vdup.16 q4, d4[0]
// Move r2 back to account for the last 3 pixels we loaded before,
// which we shifted out. sub r2, r2, #6
vext.8 q3, q2, q3, #10
vext.8 q2, q4, q2, #10
2:
tst r5, #2 // LR_HAVE_RIGHT
bne 4f
3: // !LR_HAVE_RIGHT
// Check whether we need to pad the right edge
cmp r4, #11
bge 4f // If w >= 11, all used input pixels are valid
// 1 <= w < 11, w+3 pixels valid in q2-q3. For w=9 or w=10,
// this ends up called again; it's not strictly needed in those
// cases (we pad enough here), but keeping the code as simple as possible.
// The padding pixel is q1/2.h[w+2]. r2 points at the next input, ie
// q1/2.h[16]. Thus read from r2[w-14] to find the padding pixel. sub lr, r4, #14
lsl lr, lr, #1
// Insert padding in q2/3.h[w+3] onwards; fuse the +3 (*2) into the
// buffer pointer.
movrel_local r3, right_ext_mask, -6
ldrh lr, [r2, lr] sub r3, r3, r4, lsl #1
vdup.16 q4, lr
vld1.8 {q8, q9}, [r3]
ble 9f
vmov q2, q3
tst r5, #2 // LR_HAVE_RIGHT
vld1.16 {q3}, [r2]!
bne 4b // If we don't need to pad, just keep filtering.
b 3b // If we need to pad, check how many pixels we have left.
9:
// Reload ptrs from arguments on the stack
ldr lr, [sp, #108]
// Rotate the window of pointers. Shift the 6 pointers downwards one step.
ldrd r6, r7, [lr, #4]
ldrd r8, r9, [lr, #12]
ldrd r10, r11, [lr, #20]
strd r6, r7, [lr]
strd r8, r9, [lr, #8]
strd r10, r11, [lr, #16]
// The topmost pointer, ptrs[6], which isn't used as input, is set to
// ptrs[0], which will be used as output for the next _hv call.
// At the start of the filtering, the caller may set ptrs[6] to the
// right next buffer to fill in, instead.
str r6, [lr, #24]
// LR_HAVE_LEFT && left == NULL sub r3, r3, #4
vld1.8 {q0, q1}, [r3]!
b 2f
0:
// LR_HAVE_LEFT, left != NULL
vld1.8 {q0, q1}, [r3]!
vld1.16 {d5}, [r2]
// Move r3 back to account for the last 2 pixels we loaded earlier,
// which we'll shift out. sub r3, r3, #4
vext.8 q1, q0, q1, #12
vext.8 q0, q2, q0, #12
b 2f
1:
vld1.8 {q0, q1}, [r3]!
// !LR_HAVE_LEFT, fill q1 with the leftmost pixel
// and shift q0/q1 to have 2x the first pixel at the front.
vdup.16 q2, d0[0]
// Move r3 back to account for the last 2 pixels we loaded before,
// which we shifted out. sub r3, r3, #4
vext.8 q1, q0, q1, #12
vext.8 q0, q2, q0, #12
2:
tst r5, #2 // LR_HAVE_RIGHT
bne 4f
// If we'll need to pad the right edge, load that pixel to pad with
// here since we can find it pretty easily from here. sub lr, r4, #(2 + 16 - 2 + 1)
lsl lr, lr, #1
ldrh lr, [r3, lr]
// Fill q14 with the right padding pixel
vdup.16 q14, lr
3: // !LR_HAVE_RIGHT
// Check whether we need to pad the right edge
cmp r4, #10
bge 4f // If w >= 10, all used input pixels are valid
// 1 <= w < 10, w pixels valid in q0-q1. For w=9, this ends up called
// again; it's not strictly needed in those cases (we pad enough here),
// but keeping the code as simple as possible.
// Insert padding in q0.h[w] onwards
movrel_local lr, right_ext_mask sub lr, lr, r4, lsl #1
vld1.8 {q12, q13}, [lr]
// LR_HAVE_LEFT && left == NULL sub r3, r3, #6
vld1.8 {q0, q1}, [r3]!
b 2f
0:
// LR_HAVE_LEFT, left != NULL
vld1.8 {q0, q1}, [r3]!
vld1.16 {d5}, [r2]
// Move r3 back to account for the last 2 pixels we loaded earlier,
// which we'll shift out. sub r3, r3, #6
vext.8 q1, q0, q1, #10
vext.8 q0, q2, q0, #10
b 2f
1:
vld1.8 {q0, q1}, [r3]!
// !LR_HAVE_LEFT, fill q1 with the leftmost pixel
// and shift q0/q1 to have 3x the first pixel at the front.
vdup.16 q2, d0[0]
// Move r3 back to account for the last 3 pixels we loaded before,
// which we shifted out. sub r3, r3, #6
vext.8 q1, q0, q1, #10
vext.8 q0, q2, q0, #10
2:
tst r5, #2 // LR_HAVE_RIGHT
bne 4f
// If we'll need to pad the right edge, load that pixel to pad with
// here since we can find it pretty easily from here. sub lr, r4, #(2 + 16 - 3 + 1)
lsl lr, lr, #1
ldrh lr, [r3, lr]
// Fill q14 with the right padding pixel
vdup.16 q14, lr
3: // !LR_HAVE_RIGHT
// Check whether we need to pad the right edge
cmp r4, #11
bge 4f // If w >= 11, all used input pixels are valid
// 1 <= w < 11, w+1 pixels valid in q0-q1. For w=9 or w=10,
// this ends up called again; it's not strictly needed in those
// cases (we pad enough here), but keeping the code as simple as possible.
// Insert padding in q0.h[w+1] onwards; fuse the +1 into the
// buffer pointer.
movrel_local lr, right_ext_mask, -2 sub lr, lr, r4, lsl #1
vld1.8 {q12, q13}, [lr]
ble 9f
tst r5, #2 // LR_HAVE_RIGHT
vmov q0, q1
vld1.16 {q1}, [r3]!
bne 4b // If we don't need to pad, just keep summing.
b 3b // If we need to pad, check how many pixels we have left.
// LR_HAVE_LEFT && left == NULL sub r5, r5, #6
vld1.8 {q0, q1}, [r5]!
b 2f
0:
// LR_HAVE_LEFT, left != NULL
vld1.8 {q0, q1}, [r5]!
vld1.16 {d5}, [r4]
// Move r3 back to account for the last 2 pixels we loaded earlier,
// which we'll shift out. sub r5, r5, #6
vext.8 q1, q0, q1, #10
vext.8 q0, q2, q0, #10
b 2f
1:
vld1.8 {q0, q1}, [r5]!
// !LR_HAVE_LEFT, fill q1 with the leftmost pixel
// and shift q0/q1 to have 3x the first pixel at the front.
vdup.16 q2, d0[0]
// Move r3 back to account for the last 3 pixels we loaded before,
// which we shifted out. sub r5, r5, #6
vext.8 q1, q0, q1, #10
vext.8 q0, q2, q0, #10
2:
tst r7, #2 // LR_HAVE_RIGHT
bne 4f
// If we'll need to pad the right edge, load that pixel to pad with
// here since we can find it pretty easily from here. sub lr, r6, #(2 + 16 - 3 + 1)
lsl lr, lr, #1
ldrh lr, [r5, lr]
// Fill q14 with the right padding pixel
vdup.16 q14, lr
3: // !LR_HAVE_RIGHT
// Check whether we need to pad the right edge
cmp r6, #11
bge 4f // If w >= 11, all used input pixels are valid
// 1 <= w < 11, w+1 pixels valid in q0-q1. For w=9 or w=10,
// this ends up called again; it's not strictly needed in those
// cases (we pad enough here), but keeping the code as simple as possible.
// Insert padding in q0.h[w+1] onwards; fuse the +1 into the
// buffer pointer.
movrel_local lr, right_ext_mask, -2 sub lr, lr, r6, lsl #1
vld1.8 {q12, q13}, [lr]
ble 9f
tst r7, #2 // LR_HAVE_RIGHT
vmov q0, q1
vld1.16 {q1}, [r5]!
bne 4b // If we don't need to pad, just keep summing.
b 3b // If we need to pad, check how many pixels we have left.
9:
pop {r4-r7,pc}
endfunc
sgr_funcs 16
Messung V0.5
¤ Dauer der Verarbeitung: 0.18 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.