// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
// First step of a k-means clustering with a fancy distance metric.
Status FastClusterHistograms(const std::vector<Histogram>& in,
size_t max_histograms, std::vector<Histogram>* out,
std::vector<uint32_t>* histogram_symbols) { const size_t prev_histograms = out->size();
out->reserve(max_histograms);
histogram_symbols->clear();
histogram_symbols->resize(in.size(), max_histograms);
std::vector<float> dists(in.size(), std::numeric_limits<float>::max());
size_t largest_idx = 0; for (size_t i = 0; i < in.size(); i++) { if (in[i].total_count_ == 0) {
(*histogram_symbols)[i] = 0;
dists[i] = 0.0f; continue;
}
HistogramEntropy(in[i]); if (in[i].total_count_ > in[largest_idx].total_count_) {
largest_idx = i;
}
}
if (prev_histograms > 0) { for (size_t j = 0; j < prev_histograms; ++j) {
HistogramEntropy((*out)[j]);
} for (size_t i = 0; i < in.size(); i++) { if (dists[i] == 0.0f) continue; for (size_t j = 0; j < prev_histograms; ++j) {
dists[i] = std::min(HistogramKLDivergence(in[i], (*out)[j]), dists[i]);
}
} auto max_dist = std::max_element(dists.begin(), dists.end()); if (*max_dist > 0.0f) {
largest_idx = max_dist - dists.begin();
}
}
constexpr float kMinDistanceForDistinct = 48.0f; while (out->size() < max_histograms) {
(*histogram_symbols)[largest_idx] = out->size();
out->push_back(in[largest_idx]);
dists[largest_idx] = 0.0f;
largest_idx = 0; for (size_t i = 0; i < in.size(); i++) { if (dists[i] == 0.0f) continue;
dists[i] = std::min(HistogramDistance(in[i], out->back()), dists[i]); if (dists[i] > dists[largest_idx]) largest_idx = i;
} if (dists[largest_idx] < kMinDistanceForDistinct) break;
}
for (size_t i = 0; i < in.size(); i++) { if ((*histogram_symbols)[i] != max_histograms) continue;
size_t best = 0; float best_dist = std::numeric_limits<float>::max(); for (size_t j = 0; j < out->size(); j++) { float dist = j < prev_histograms ? HistogramKLDivergence(in[i], (*out)[j])
: HistogramDistance(in[i], (*out)[j]); if (dist < best_dist) {
best = j;
best_dist = dist;
}
}
JXL_ENSURE(best_dist < std::numeric_limits<float>::max()); if (best >= prev_histograms) {
(*out)[best].AddHistogram(in[i]);
HistogramEntropy((*out)[best]);
}
(*histogram_symbols)[i] = best;
} returntrue;
}
// Reorder histograms in *out so that the new symbols in *symbols come in // increasing order. void HistogramReindex(std::vector<Histogram>* out, size_t prev_histograms,
std::vector<uint32_t>* symbols) {
std::vector<Histogram> tmp(*out);
std::map<int, int> new_index; for (size_t i = 0; i < prev_histograms; ++i) {
new_index[i] = i;
} int next_index = prev_histograms; for (uint32_t symbol : *symbols) { if (new_index.find(symbol) == new_index.end()) {
new_index[symbol] = next_index;
(*out)[next_index] = tmp[symbol];
++next_index;
}
}
out->resize(next_index); for (uint32_t& symbol : *symbols) {
symbol = new_index[symbol];
}
}
} // namespace
// Clusters similar histograms in 'in' together, the selected histograms are // placed in 'out', and for each index in 'in', *histogram_symbols will // indicate which of the 'out' histograms is the best approximation.
Status ClusterHistograms(const HistogramParams& params, const std::vector<Histogram>& in,
size_t max_histograms, std::vector<Histogram>* out,
std::vector<uint32_t>* histogram_symbols) {
size_t prev_histograms = out->size();
max_histograms = std::min(max_histograms, params.max_histograms);
max_histograms = std::min(max_histograms, in.size()); if (params.clustering == HistogramParams::ClusteringType::kFastest) {
max_histograms = std::min(max_histograms, static_cast<size_t>(4));
}
JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(FastClusterHistograms)(
in, prev_histograms + max_histograms, out, histogram_symbols));
// Try to pair up clusters if doing so reduces the total cost.
struct HistogramPair { // validity of a pair: p.version == max(version[i], version[j]) float cost;
uint32_t first;
uint32_t second;
uint32_t version; // We use > because priority queues sort in *decreasing* order, but we // want lower cost elements to appear first. booloperator<(const HistogramPair& other) const { return std::make_tuple(cost, first, second, version) >
std::make_tuple(other.cost, other.first, other.second,
other.version);
}
};
// Create list of all pairs by increasing merging cost.
std::priority_queue<HistogramPair> pairs_to_merge; for (uint32_t i = 0; i < out->size(); i++) { for (uint32_t j = i + 1; j < out->size(); j++) {
Histogram histo;
histo.AddHistogram((*out)[i]);
histo.AddHistogram((*out)[j]);
JXL_ASSIGN_OR_RETURN(float cost, ANSPopulationCost(histo.data_.data(),
histo.data_.size()));
cost -= (*out)[i].entropy_ + (*out)[j].entropy_; // Avoid enqueueing pairs that are not advantageous to merge. if (cost >= 0) continue;
pairs_to_merge.push(
HistogramPair{cost, i, j, std::max(version[i], version[j])});
}
}
// Merge the best pair to merge, add new pairs that get formed as a // consequence. while (!pairs_to_merge.empty()) {
uint32_t first = pairs_to_merge.top().first;
uint32_t second = pairs_to_merge.top().second;
uint32_t ver = pairs_to_merge.top().version;
pairs_to_merge.pop(); if (ver != std::max(version[first], version[second]) ||
version[first] == 0 || version[second] == 0) { continue;
}
(*out)[first].AddHistogram((*out)[second]);
JXL_ASSIGN_OR_RETURN(float cost,
ANSPopulationCost((*out)[first].data_.data(),
(*out)[first].data_.size()));
(*out)[first].entropy_ = cost; for (uint32_t& item : renumbering) { if (item == second) {
item = first;
}
}
version[second] = 0;
version[first] = next_version++; for (uint32_t j = 0; j < out->size(); j++) { if (j == first) continue; if (version[j] == 0) continue;
Histogram histo;
histo.AddHistogram((*out)[first]);
histo.AddHistogram((*out)[j]);
JXL_ASSIGN_OR_RETURN(float cost, ANSPopulationCost(histo.data_.data(),
histo.data_.size()));
cost -= (*out)[first].entropy_ + (*out)[j].entropy_; // Avoid enqueueing pairs that are not advantageous to merge. if (cost >= 0) continue;
pairs_to_merge.push(
HistogramPair{cost, std::min(first, j), std::max(first, j),
std::max(version[first], version[j])});
}
}
std::vector<uint32_t> reverse_renumbering(out->size(), -1);
size_t num_alive = 0; for (size_t i = 0; i < out->size(); i++) { if (version[i] == 0) continue;
(*out)[num_alive++] = (*out)[i];
reverse_renumbering[i] = num_alive - 1;
}
out->resize(num_alive); for (uint32_t& item : *histogram_symbols) {
item = reverse_renumbering[renumbering[item]];
}
}
// Convert the context map to a canonical form.
HistogramReindex(out, prev_histograms, histogram_symbols); returntrue;
}
} // namespace jxl #endif// HWY_ONCE
Messung V0.5
¤ Dauer der Verarbeitung: 0.1 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.