/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ // Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file.
// Histogram is an object that aggregates statistics, and can summarize them in // various forms, including ASCII graphical, HTML, and numerically (as a // vector of numbers corresponding to each of the aggregating buckets). // See header file for details and examples.
//------------------------------------------------------------------------------ // Methods for the validating a sample and a related histogram. //------------------------------------------------------------------------------
Histogram::Inconsistencies Histogram::FindCorruption( const SampleSet& snapshot) const { int inconsistencies = NO_INCONSISTENCIES;
Sample previous_range = -1; // Bottom range is always 0.
int64_t count = 0; for (size_t index = 0; index < bucket_count(); ++index) {
count += snapshot.counts(index); int new_range = ranges(index); if (previous_range >= new_range) inconsistencies |= BUCKET_ORDER_ERROR;
previous_range = new_range;
}
if (!HasValidRangeChecksum()) inconsistencies |= RANGE_CHECKSUM_ERROR;
int64_t delta64 = snapshot.redundant_count() - count; if (delta64 != 0) { int delta = static_cast<int>(delta64); if (delta != delta64) delta = INT_MAX; // Flag all giant errors as INT_MAX. // Since snapshots of histograms are taken asynchronously relative to // sampling (and snapped from different threads), it is pretty likely that // we'll catch a redundant count that doesn't match the sample count. We // allow for a certain amount of slop before flagging this as an // inconsistency. Even with an inconsistency, we'll snapshot it again (for // UMA in about a half hour, so we'll eventually get the data, if it was // not the result of a corruption. If histograms show that 1 is "too tight" // then we may try to use 2 or 3 for this slop value. constint kCommonRaceBasedCountMismatch = 1; if (delta > 0) { if (delta > kCommonRaceBasedCountMismatch)
inconsistencies |= COUNT_HIGH_ERROR;
} else {
DCHECK_GT(0, delta); if (-delta > kCommonRaceBasedCountMismatch)
inconsistencies |= COUNT_LOW_ERROR;
}
} returnstatic_cast<Inconsistencies>(inconsistencies);
}
size_t Histogram::BucketIndex(Sample value) const { // Use simple binary search. This is very general, but there are better // approaches if we knew that the buckets were linearly distributed.
DCHECK_LE(ranges(0), value);
DCHECK_GT(ranges(bucket_count()), value);
size_t under = 0;
size_t over = bucket_count();
size_t mid;
do {
DCHECK_GE(over, under);
mid = under + (over - under) / 2; if (mid == under) break; if (ranges(mid) <= value)
under = mid; else
over = mid;
} while (true);
// Use the actual bucket widths (like a linear histogram) until the widths get // over some transition value, and then use that transition width. Exponentials // get so big so fast (and we don't expect to see a lot of entries in the large // buckets), so we need this to make it possible to see what is going on and // not have 0-graphical-height buckets. double Histogram::GetBucketSize(Count current, size_t i) const {
DCHECK_GT(ranges(i + 1), ranges(i)); staticconstdouble kTransitionWidth = 5; double denominator = ranges(i + 1) - ranges(i); if (denominator > kTransitionWidth)
denominator = kTransitionWidth; // Stop trying to normalize. return current / denominator;
}
// We generate the CRC-32 using the low order bits to select whether to XOR in // the reversed polynomial 0xedb88320L. This is nice and simple, and allows us // to keep the quotient in a uint32_t. Since we're not concerned about the // nature of corruptions (i.e., we don't care about bit sequencing, since we are // handling memory changes, which are more grotesque) so we don't bother to // get the CRC correct for big-endian vs little-ending calculations. All we // need is a nice hash, that tends to depend on all the bits of the sample, with // very little chance of changes in one place impacting changes in another // place.
uint32_t Histogram::Crc32(uint32_t sum, Histogram::Sample range) { constbool kUseRealCrc = true; // TODO(jar): Switch to false and watch stats. if (kUseRealCrc) { union {
Histogram::Sample range; unsignedchar bytes[sizeof(Histogram::Sample)];
} converter;
converter.range = range; for (size_t i = 0; i < sizeof(converter); ++i)
sum = kCrcTable[(sum & 0xff) ^ converter.bytes[i]] ^ (sum >> 8);
} else { // Use hash techniques provided in ReallyFastHash, except we don't care // about "avalanching" (which would worsten the hash, and add collisions), // and we don't care about edge cases since we have an even number of bytes. union {
Histogram::Sample range;
uint16_t ints[sizeof(Histogram::Sample) / 2];
} converter;
DCHECK_EQ(sizeof(Histogram::Sample), sizeof(converter));
converter.range = range;
sum += converter.ints[0];
sum = (sum << 16) ^ sum ^ (static_cast<uint32_t>(converter.ints[1]) << 11);
sum += sum >> 11;
} return sum;
}
double Histogram::GetPeakBucketSize(const SampleSet& snapshot) const { double max = 0; for (size_t i = 0; i < bucket_count(); ++i) { double current_size = GetBucketSize(snapshot.counts(i), i); if (current_size > max) max = current_size;
} return max;
}
//------------------------------------------------------------------------------ // Methods for the Histogram::SampleSet class //------------------------------------------------------------------------------
Count Histogram::SampleSet::TotalCount() const {
Count total = 0; for (Counts::const_iterator it = counts_.begin(); it != counts_.end(); ++it) {
total += *it;
} return total;
}
void Histogram::SampleSet::Add(const SampleSet& other) {
DCHECK_EQ(counts_.Length(), other.counts_.Length());
sum_ += other.sum_;
redundant_count_ += other.redundant_count_; for (size_t index = 0; index < counts_.Length(); ++index)
counts_[index] += other.counts_[index];
}
//------------------------------------------------------------------------------ // LinearHistogram: This histogram uses a traditional set of evenly spaced // buckets. //------------------------------------------------------------------------------
double LinearHistogram::GetBucketSize(Count current, size_t i) const {
DCHECK_GT(ranges(i + 1), ranges(i)); // Adjacent buckets with different widths would have "surprisingly" many (few) // samples in a histogram if we didn't normalize this way. double denominator = ranges(i + 1) - ranges(i); return current / denominator;
}
const std::string LinearHistogram::GetAsciiBucketRange(size_t i) const { int range = ranges(i);
BucketDescriptionMap::const_iterator it = bucket_description_.find(range); if (it == bucket_description_.end()) return Histogram::GetAsciiBucketRange(i); return it->second;
}
//------------------------------------------------------------------------------ // This section provides implementation for BooleanHistogram. //------------------------------------------------------------------------------
void BooleanHistogram::Accumulate(Sample value, Count count, size_t index) { // Callers will have computed index based on the non-booleanified value. // So we need to adjust the index manually.
LinearHistogram::Accumulate(!!value, count, value ? 1 : 0);
}
void FlagHistogram::AddSampleSet(const SampleSet& sample) {
DCHECK_EQ(bucket_count(), sample.size()); // We can't be sure the SampleSet provided came from another FlagHistogram, // so we take the following steps: // - If our flag has already been set do nothing. // - Set our flag if the following hold: // - The sum of the counts in the provided SampleSet is 1. // - The bucket index for that single value is the same as the index // where we // would place our set flag. // - Otherwise, take no action.
void CountHistogram::AddSampleSet(const SampleSet& sample) {
DCHECK_EQ(bucket_count(), sample.size()); // We can't be sure the SampleSet provided came from another CountHistogram, // so we at least check that the unused buckets are empty.
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.