/* * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Calculates the modular multiplicative inverse */ static av_always_inline int mulinv(int n, int m)
{
n = n % m; for (int x = 1; x < m; x++) if (((n * x) % m) == 1) return x;
av_assert0(0); /* Never reached */ return 0;
}
int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts, int d1, int d2)
{ constint sl = d1*d2;
s->map = av_malloc(s->len*sizeof(*s->map)); if (!s->map) return AVERROR(ENOMEM);
for (int k = 0; k < s->len; k += sl) { if (s->inv || (opts && opts->map_dir == FF_TX_MAP_SCATTER)) { for (int m = 0; m < d2; m++) for (int n = 0; n < d1; n++)
s->map[k + ((m*d1 + n*d2) % (sl))] = m*d1 + n;
} else { for (int m = 0; m < d2; m++) for (int n = 0; n < d1; n++)
s->map[k + m*d1 + n] = (m*d1 + n*d2) % (sl);
}
if (s->inv) for (int w = 1; w <= ((sl) >> 1); w++)
FFSWAP(int, s->map[k + w], s->map[k + sl - w]);
}
/* Guaranteed to work for any n, m where gcd(n, m) == 1 */ int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts, int inv, int n, int m)
{ int *in_map, *out_map; constint len = n*m; /* Will not be equal to s->len for MDCTs */ int m_inv, n_inv;
/* Make sure the numbers are coprime */ if (av_gcd(n, m) != 1) return AVERROR(EINVAL);
m_inv = mulinv(m, n);
n_inv = mulinv(n, m);
if (!(s->map = av_malloc(2*len*sizeof(*s->map)))) return AVERROR(ENOMEM);
in_map = s->map;
out_map = s->map + len;
/* Ruritanian map for input, CRT map for output, can be swapped */ if (opts && opts->map_dir == FF_TX_MAP_SCATTER) { for (int j = 0; j < m; j++) { for (int i = 0; i < n; i++) {
in_map[(i*m + j*n) % len] = j*n + i;
out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
}
}
} else { for (int j = 0; j < m; j++) { for (int i = 0; i < n; i++) {
in_map[j*n + i] = (i*m + j*n) % len;
out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
}
}
}
if (inv) { for (int i = 0; i < m; i++) { int *in = &in_map[i*n + 1]; /* Skip the DC */ for (int j = 0; j < ((n - 1) >> 1); j++)
FFSWAP(int, in[j], in[n - j - 2]);
}
}
int ff_tx_gen_inplace_map(AVTXContext *s, int len)
{ int *src_map, out_map_idx = 0;
if (!s->sub || !s->sub->map) return AVERROR(EINVAL);
if (!(s->map = av_mallocz(len*sizeof(*s->map)))) return AVERROR(ENOMEM);
src_map = s->sub->map;
/* The first coefficient is always already in-place */ for (int src = 1; src < s->len; src++) { int dst = src_map[src]; int found = 0;
if (dst <= src) continue;
/* This just checks if a closed loop has been encountered before, * and if so, skips it, since to fully permute a loop we must only
* enter it once. */ do { for (int j = 0; j < out_map_idx; j++) { if (dst == s->map[j]) {
found = 1; break;
}
}
dst = src_map[dst];
} while (dst != src && !found);
if (!found)
s->map[out_map_idx++] = src;
}
s->map[out_map_idx++] = 0;
return 0;
}
staticvoid parity_revtab_generator(int *revtab, int n, int inv, int offset, int is_dual, int dual_high, int len, int basis, int dual_stride, int inv_lookup)
{
len >>= 1;
if (len <= basis) { int k1, k2, stride, even_idx, odd_idx;
parity_revtab_generator(revtab, n, inv, offset,
0, 0, len >> 0, basis, dual_stride, inv_lookup);
parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
1, 0, len >> 1, basis, dual_stride, inv_lookup);
parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
1, 1, len >> 1, basis, dual_stride, inv_lookup);
}
int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int len, int inv,
FFTXCodeletOptions *opts, int basis, int dual_stride)
{
basis >>= 1; if (len < basis) return AVERROR(EINVAL);
if (!(s->map = av_mallocz(len*sizeof(*s->map)))) return AVERROR(ENOMEM);
/* Nothing else needs to be reset, it gets overwritten if another
* ff_tx_init_subtx() call is made. */
s->nb_sub = 0;
s->opaque = NULL;
memset(s->fn, 0, sizeof(*s->fn));
}
av_cold void av_tx_uninit(AVTXContext **ctx)
{ if (!(*ctx)) return;
reset_ctx(*ctx, 1);
av_freep(ctx);
}
static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, constvoid *scale)
{ /* Can only handle one sample+type to one sample+type transforms */ if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type)) return AVERROR(EINVAL); return 0;
}
/* Null transform when the length is 1 */ staticvoid ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
{
memcpy(_out, _in, stride);
}
staticint get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
{ int prio = cd->prio; int max_factor = 0;
/* If the CPU has a SLOW flag, and the instruction is also flagged
* as being slow for such, reduce its priority */ for (int i = 0; i < FF_ARRAY_ELEMS(cpu_slow_penalties); i++) { if ((cpu_flags & cd->cpu_flags) & cpu_slow_penalties[i][0])
prio -= cpu_slow_penalties[i][1];
}
/* Codelets for specific lengths are generally faster */ if ((len == cd->min_len) && (len == cd->max_len))
prio += 64;
/* Forward-only or inverse-only transforms are generally better */ if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
prio += 64;
/* Larger factors are generally better */ for (int i = 0; i < TX_MAX_SUB; i++)
max_factor = FFMAX(cd->factors[i], max_factor); if (max_factor)
prio += 16*max_factor;
return prio;
}
typedefstruct FFTXLenDecomp { int len; int len2; int prio; const FFTXCodelet *cd;
} FFTXLenDecomp;
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type, int len, int inv)
{ int nb_decomp = 0;
FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS]; int codelet_list_idx = codelet_list_num;
constint cpu_flags = av_get_cpu_flags();
/* Loop through all codelets in all codelet lists to find matches
* to the requirements */ while (codelet_list_idx--) { const FFTXCodelet * const * list = codelet_list[codelet_list_idx]; const FFTXCodelet *cd = NULL;
while ((cd = *list++)) { int fl = len; int skip = 0, prio; int factors_product = 1, factors_mod = 0;
if (nb_decomp >= TX_MAX_DECOMPOSITIONS) goto sort;
/* Check if the type matches */ if (cd->type != TX_TYPE_ANY && type != cd->type) continue;
/* Check if the CPU supports the required ISA */ if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
!(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask))) continue;
for (int i = 0; i < TX_MAX_FACTORS; i++) { if (!cd->factors[i] || (fl == 1)) break;
if (cd->factors[i] == TX_FACTOR_ANY) {
factors_mod++;
factors_product *= fl;
} elseif (!(fl % cd->factors[i])) {
factors_mod++; if (cd->factors[i] == 2) { int b = ff_ctz(fl);
fl >>= b;
factors_product <<= b;
} else { do {
fl /= cd->factors[i];
factors_product *= cd->factors[i];
} while (!(fl % cd->factors[i]));
}
}
}
/* Disqualify if factor requirements are not satisfied or if trivial */ if ((factors_mod < cd->nb_factors) || (len == factors_product)) continue;
if (av_gcd(factors_product, fl) != 1) continue;
/* Check if length is supported and factorization was successful */ if ((factors_product < cd->min_len) ||
(cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len))) continue;
/* Check for duplicates */ for (int i = 0; i < nb_decomp; i++) { if (factors_product == ld[i].len) { /* Update priority if new one is higher */ if (prio > ld[i].prio)
ld[i].prio = prio;
skip = 1; break;
}
}
for (int i = 0; i < nb_decomp; i++) { if (ld[i].cd->nb_factors > 1)
dst[i] = ld[i].len2; else
dst[i] = ld[i].len;
}
return nb_decomp;
}
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
{
s->map = av_malloc(s->len*sizeof(*s->map)); if (!s->map) return AVERROR(ENOMEM);
s->map[0] = 0; /* DC is always at the start */ if (s->inv) /* Reversing the ACs flips the transform direction */ for (int i = 1; i < s->len; i++)
s->map[i] = s->len - i; else for (int i = 1; i < s->len; i++)
s->map[i] = i;
staticvoid print_type(AVBPrint *bp, enum AVTXType type)
{
av_bprintf(bp, "%s",
type == TX_TYPE_ANY ? "any" :
type == AV_TX_FLOAT_FFT ? "fft_float" :
type == AV_TX_FLOAT_MDCT ? "mdct_float" :
type == AV_TX_FLOAT_RDFT ? "rdft_float" :
type == AV_TX_FLOAT_DCT_I ? "dctI_float" :
type == AV_TX_FLOAT_DST_I ? "dstI_float" :
type == AV_TX_DOUBLE_FFT ? "fft_double" :
type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
type == AV_TX_DOUBLE_DCT_I ? "dctI_double" :
type == AV_TX_DOUBLE_DST_I ? "dstI_double" :
type == AV_TX_INT32_FFT ? "fft_int32" :
type == AV_TX_INT32_MDCT ? "mdct_int32" :
type == AV_TX_INT32_RDFT ? "rdft_int32" :
type == AV_TX_INT32_DCT_I ? "dctI_int32" :
type == AV_TX_INT32_DST_I ? "dstI_int32" : "unknown");
}
staticvoid print_cd_info(const FFTXCodelet *cd, int prio, int len, int print_prio, int log_level)
{
AVBPrint bp;
av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
av_bprintf(&bp, "%s - type: ", cd->name);
print_type(&bp, cd->type);
av_bprintf(&bp, ", len: "); if (!len) { if (cd->min_len != cd->max_len)
av_bprintf(&bp, "[%i, ", cd->min_len);
/* We want all factors to completely cover the length */ staticinlineint check_cd_factors(const FFTXCodelet *cd, int len)
{ int matches = 0, any_flag = 0;
for (int i = 0; i < TX_MAX_FACTORS; i++) { int factor = cd->factors[i];
if (factor == TX_FACTOR_ANY) {
any_flag = 1;
matches++; continue;
} elseif (len <= 1 || !factor) { break;
} elseif (factor == 2) { /* Fast path */ int bits_2 = ff_ctz(len); if (!bits_2) continue; /* Factor not supported */
len >>= bits_2;
matches++;
} else { int res = len % factor; if (res) continue; /* Factor not supported */
while (!res) {
len /= factor;
res = len % factor;
}
matches++;
}
}
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, constvoid *scale)
{ int ret = 0;
AVTXContext *sub = NULL;
TXCodeletMatch *cd_tmp, *cd_matches = NULL; unsignedint cd_matches_size = 0; int codelet_list_idx = codelet_list_num; int nb_cd_matches = 0; #if !CONFIG_SMALL
AVBPrint bp; #endif
/* We still accept functions marked with SLOW, even if the CPU is
* marked with the same flag, but we give them lower priority. */ constint cpu_flags = av_get_cpu_flags();
/* Flags the transform wants */
uint64_t req_flags = flags;
/* Flags the codelet may require to be present */
uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
AV_TX_REAL_TO_REAL |
AV_TX_REAL_TO_IMAGINARY |
FF_TX_PRESHUFFLE |
FF_TX_ASM_CALL;
/* Unaligned codelets are compatible with the aligned flag */ if (req_flags & FF_TX_ALIGNED)
req_flags |= AV_TX_UNALIGNED;
/* If either flag is set, both are okay, so don't check for an exact match */ if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE); if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
/* Loop through all codelets in all codelet lists to find matches
* to the requirements */ while (codelet_list_idx--) { const FFTXCodelet * const * list = codelet_list[codelet_list_idx]; const FFTXCodelet *cd = NULL;
while ((cd = *list++)) { /* Check if the type matches */ if (cd->type != TX_TYPE_ANY && type != cd->type) continue;
/* Check if the requested flags match from both sides */ if (((req_flags & cd->flags) != (req_flags)) ||
((inv_req_mask & cd->flags) != (req_flags & inv_req_mask))) continue;
/* Check if length is supported */ if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len))) continue;
/* Check if the CPU supports the required ISA */ if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
!(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask))) continue;
/* Check for factors */ if (!check_cd_factors(cd, len)) continue;
for (int i = 0; i < nb_cd_matches; i++) {
av_log(NULL, AV_LOG_TRACE, " %i: ", i + 1);
print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 0, 1, AV_LOG_TRACE);
} #endif
if (!s->sub) {
s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub)); if (!sub) {
ret = AVERROR(ENOMEM); goto end;
}
}
/* Attempt to initialize each */ for (int i = 0; i < nb_cd_matches; i++) { const FFTXCodelet *cd = cd_matches[i].cd;
AVTXContext *sctx = &s->sub[s->nb_sub];
ret = 0; if (cd->init)
ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
if (ret >= 0) { if (opts && opts->map_dir != FF_TX_MAP_NONE &&
sctx->map_dir == FF_TX_MAP_NONE) { /* If a specific map direction was requested, and it doesn't
* exist, create one.*/
sctx->map = av_malloc(len*sizeof(*sctx->map)); if (!sctx->map) {
ret = AVERROR(ENOMEM); goto end;
}
for (int i = 0; i < len; i++)
sctx->map[i] = i;
} elseif (opts && (opts->map_dir != sctx->map_dir)) { int *tmp = av_malloc(len*sizeof(*sctx->map)); if (!tmp) {
ret = AVERROR(ENOMEM); goto end;
}
memcpy(tmp, sctx->map, len*sizeof(*sctx->map));
for (int i = 0; i < len; i++)
sctx->map[tmp[i]] = i;
av_free(tmp);
}
s->nb_sub++; goto end;
}
s->fn[s->nb_sub] = NULL;
s->cd[s->nb_sub] = NULL;
reset_ctx(sctx, 0); if (ret == AVERROR(ENOMEM)) break;
}
if (!s->nb_sub)
av_freep(&s->sub);
end:
av_free(cd_matches); return ret;
}
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, constvoid *scale, uint64_t flags)
{ int ret;
AVTXContext tmp = { 0 }; constdouble default_scale_d = 1.0; constfloat default_scale_f = 1.0f;
if (!len || type >= AV_TX_NB || !ctx || !tx) return AVERROR(EINVAL);
if (!(flags & AV_TX_UNALIGNED))
flags |= FF_TX_ALIGNED; if (!(flags & AV_TX_INPLACE))
flags |= FF_TX_OUT_OF_PLACE;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.