//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ // Originally based on Chrome sources: // Copyright (c) 2010 The Chromium Authors. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Main store for SafeBrowsing protocol data. We store // known add/sub chunks, prefixes and completions in memory // during an update, and serialize to disk. // We do not store the add prefixes, those are retrieved by // decompressing the PrefixSet cache whenever we need to apply // an update. // // byte slicing: Many of the 4-byte values stored here are strongly // correlated in the upper bytes, and uncorrelated in the lower // bytes. Because zlib/DEFLATE requires match lengths of at least // 3 to achieve good compression, and we don't get those if only // the upper 16-bits are correlated, it is worthwhile to slice 32-bit // values into 4 1-byte slices and compress the slices individually. // The slices corresponding to MSBs will compress very well, and the // slice corresponding to LSB almost nothing. Because of this, we // only apply DEFLATE to the 3 most significant bytes, and store the // LSB uncompressed. // // byte sliced (numValues) data format: // uint32_t compressed-size // compressed-size bytes zlib DEFLATE data // 0...numValues byte MSB of 4-byte numValues data // uint32_t compressed-size // compressed-size bytes zlib DEFLATE data // 0...numValues byte 2nd byte of 4-byte numValues data // uint32_t compressed-size // compressed-size bytes zlib DEFLATE data // 0...numValues byte 3rd byte of 4-byte numValues data // 0...numValues byte LSB of 4-byte numValues data // // Store data format: // uint32_t magic // uint32_t version // uint32_t numAddChunks // uint32_t numSubChunks // uint32_t numAddPrefixes // uint32_t numSubPrefixes // uint32_t numAddCompletes // uint32_t numSubCompletes // 0...numAddChunks uint32_t addChunk // 0...numSubChunks uint32_t subChunk // byte sliced (numAddPrefixes) uint32_t add chunk of AddPrefixes // byte sliced (numSubPrefixes) uint32_t add chunk of SubPrefixes // byte sliced (numSubPrefixes) uint32_t sub chunk of SubPrefixes // byte sliced (numSubPrefixes) uint32_t SubPrefixes // byte sliced (numAddCompletes) uint32_t add chunk of AddCompletes // 0...numSubCompletes 32-byte Completions + uint32_t addChunk // + uint32_t subChunk // 16-byte MD5 of all preceding data
// Name of the SafeBrowsing store #define STORE_SUFFIX ".sbstore"
if (aSize <= 4 && LOG_ENABLED()) { const uint32_t* p = reinterpret_cast<const uint32_t*>(ToNewCString(aPrefixes));
// Dump the first/last 10 fixed-length prefixes for debugging.
LOG(("* The first 10 (maximum) fixed-length prefixes: ")); for (int i = 0; i < std::min(10, numOfPrefixes); i++) { const uint8_t* c = reinterpret_cast<const uint8_t*>(&p[i]);
LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
}
LOG(("* The last 10 (maximum) fixed-length prefixes: ")); for (int i = std::max(0, numOfPrefixes - 10); i < numOfPrefixes; i++) { const uint8_t* c = reinterpret_cast<const uint8_t*>(&p[i]);
LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
}
LOG(("---- %zu fixed-length prefixes in total.",
aPrefixes.Length() / aSize));
}
nsresult HashStore::CheckChecksum(uint32_t aFileSize) { if (!mInputStream) { return NS_OK;
}
// Check for file corruption by // comparing the stored checksum to actual checksum of data
nsAutoCString hash;
nsAutoCString compareHash;
uint32_t read;
if (hash.Length() > aFileSize) {
NS_WARNING("SafeBrowsing file not long enough to store its hash"); return NS_ERROR_FAILURE;
}
nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(mInputStream);
rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, aFileSize - hash.Length());
NS_ENSURE_SUCCESS(rv, rv);
nsresult HashStore::SanityCheck(uint32_t aVersion) const { const uint32_t VER = aVersion == 0 ? CURRENT_VERSION : aVersion; if (mHeader.magic != STORE_MAGIC || mHeader.version != VER) {
NS_WARNING("Unexpected header data in the store."); // Version mismatch is also considered file corrupted, // We need this error code to know if we should remove the file. return NS_ERROR_FILE_CORRUPTED;
}
// Size of MD5 hash in bytes const uint32_t CHECKSUM_SIZE = 16;
// MD5 is not a secure hash function, but since this is a filesystem integrity // check, this usage is ok.
rv = hash->Init(nsICryptoHash::MD5);
NS_ENSURE_SUCCESS(rv, rv);
if (!aChecksumPresent) { // Hash entire file
rv = hash->UpdateFromStream(mInputStream, UINT32_MAX);
} else { // Hash everything but last checksum bytes if (aFileSize < CHECKSUM_SIZE) {
NS_WARNING("SafeBrowsing file isn't long enough to store its checksum"); return NS_ERROR_FAILURE;
}
rv = hash->UpdateFromStream(mInputStream, aFileSize - CHECKSUM_SIZE);
}
NS_ENSURE_SUCCESS(rv, rv);
rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks);
NS_ENSURE_SUCCESS(rv, rv);
NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks, "Read the right amount of add chunks.");
rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks);
NS_ENSURE_SUCCESS(rv, rv);
NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks, "Read the right amount of sub chunks.");
return NS_OK;
}
nsresult HashStore::ReadHashes() { if (!mInputStream) { // BeginUpdate has been called but Open hasn't initialized mInputStream, // because the existing HashStore is empty. return NS_OK;
}
nsresult HashStore::BeginUpdate() { // Check wether the file is corrupted and read the rest of the store // in memory.
nsresult rv = PrepareForUpdate();
NS_ENSURE_SUCCESS(rv, rv);
// Close input stream, won't be needed any more and // we will rewrite ourselves. if (mInputStream) {
rv = mInputStream->Close();
NS_ENSURE_SUCCESS(rv, rv);
}
auto storeIter = aStorePrefixes->begin(); auto storeEnd = aStorePrefixes->end();
// use a separate array so we can keep the iterators valid // if the nsTArray grows
nsTArray<T> adds;
for (constauto& updatePrefix : aUpdatePrefixes) { // skip this chunk if we already have it, unless we're // merging completions, in which case we'll always already // have the chunk from the original prefix if (aStoreChunks->Has(updatePrefix.Chunk())) if (!aAllowMerging) continue; // XXX: binary search for insertion point might be faster in common // case? while (storeIter < storeEnd && (storeIter->Compare(updatePrefix) < 0)) { // skip forward to matching element (or not...)
storeIter++;
} // no match, add if (storeIter == storeEnd || storeIter->Compare(updatePrefix) != 0) { if (!adds.AppendElement(updatePrefix, fallible)) { return NS_ERROR_OUT_OF_MEMORY;
}
}
}
// Chunks can be empty, but we should still report we have them // to make the chunkranges continuous.
aStoreChunks->Merge(aUpdateChunks);
if (!aStorePrefixes->AppendElements(adds, fallible)) return NS_ERROR_OUT_OF_MEMORY;
if (!mSubPrefixes.SetCapacity(count, fallible)) { return NS_ERROR_OUT_OF_MEMORY;
} for (uint32_t i = 0; i < count; i++) {
SubPrefix* sub = mSubPrefixes.AppendElement(fallible);
sub->addChunk = addchunks[i];
sub->prefix.FromUint32(prefixes[i]);
sub->subChunk = subchunks[i];
}
return NS_OK;
}
// Split up PrefixArray back into the constituents
nsresult HashStore::WriteAddPrefixChunks(nsIOutputStream* aOut) {
nsTArray<uint32_t> chunks;
uint32_t count = mAddPrefixes.Length(); if (!chunks.SetCapacity(count, fallible)) { return NS_ERROR_OUT_OF_MEMORY;
}
for (uint32_t i = 0; i < count; i++) {
chunks.AppendElement(mAddPrefixes[i].Chunk());
}
for (uint32_t i = 0; i < count; i++) {
addchunks.AppendElement(mSubPrefixes[i].AddChunk());
prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32());
subchunks.AppendElement(mSubPrefixes[i].Chunk());
}
nsresult HashStore::WriteFile() {
NS_ASSERTION(mInUpdate, "Must be in update to write database."); if (nsUrlClassifierDBService::ShutdownHasStarted()) { return NS_ERROR_ABORT;
}
// Find items matching between |subs| and |adds|, and remove them, // recording the item from |adds| in |adds_removed|. To minimize // copies, the inputs are processing in parallel, so |subs| and |adds| // should be compatibly ordered (either by SBAddPrefixLess or // SBAddPrefixHashLess). // // |predAS| provides add < sub, |predSA| provides sub < add, for the // tightest compare appropriate (see calls in SBProcessSubs). template <class TSub, class TAdd> staticvoid KnockoutSubs(FallibleTArray<TSub>* aSubs,
FallibleTArray<TAdd>* aAdds) { // Keep a pair of output iterators for writing kept items. Due to // deletions, these may lag the main iterators. Using erase() on // individual items would result in O(N^2) copies. Using a list // would work around that, at double or triple the memory cost. auto addOut = aAdds->begin(); auto addIter = aAdds->begin();
auto subOut = aSubs->begin(); auto subIter = aSubs->begin();
auto addEnd = aAdds->end(); auto subEnd = aSubs->end();
while (addIter != addEnd && subIter != subEnd) { // additer compare, so it compares on add chunk
int32_t cmp = addIter->Compare(*subIter); if (cmp > 0) { // If |*sub_iter| < |*add_iter|, retain the sub.
*subOut = *subIter;
++subOut;
++subIter;
} elseif (cmp < 0) { // If |*add_iter| < |*sub_iter|, retain the add.
*addOut = *addIter;
++addOut;
++addIter;
} else { // Drop equal items
++addIter;
++subIter;
}
}
// Remove items in |removes| from |fullHashes|. |fullHashes| and // |removes| should be ordered by SBAddPrefix component. template <class T> staticvoid RemoveMatchingPrefixes(const SubPrefixArray& aSubs,
FallibleTArray<T>* aFullHashes) { // Where to store kept items. auto out = aFullHashes->begin(); auto hashIter = aFullHashes->begin(); auto hashEnd = aFullHashes->end();
auto removeIter = aSubs.begin(); auto removeEnd = aSubs.end();
while (hashIter != hashEnd && removeIter != removeEnd) {
int32_t cmp = removeIter->CompareAlt(*hashIter); if (cmp > 0) { // Keep items less than |*removeIter|.
*out = *hashIter;
++out;
++hashIter;
} elseif (cmp < 0) { // No hit for |*removeIter|, bump it forward.
++removeIter;
} else { // Drop equal items, there may be multiple hits. do {
++hashIter;
} while (hashIter != hashEnd && !(removeIter->CompareAlt(*hashIter) < 0));
++removeIter;
}
}
Erase(aFullHashes, out, hashIter);
}
staticvoid RemoveDeadSubPrefixes(SubPrefixArray& aSubs, ChunkSet& aAddChunks) { auto subIter = aSubs.begin();
for (constauto& sub : aSubs) { bool hasChunk = aAddChunks.Has(sub.AddChunk()); // Keep the subprefix if the chunk it refers to is one // we haven't seen it yet. if (!hasChunk) {
*subIter = sub;
subIter++;
}
}
#ifdef DEBUG template <class T> staticvoid EnsureSorted(FallibleTArray<T>* aArray) { auto start = aArray->begin(); auto end = aArray->end(); auto iter = start; auto previous = start;
// Remove any remaining subbed prefixes from both addprefixes // and addcompletes.
KnockoutSubs(&mSubPrefixes, &mAddPrefixes);
KnockoutSubs(&mSubCompletes, &mAddCompletes);
// Remove any remaining subprefixes referring to addchunks that // we have (and hence have been processed above).
RemoveDeadSubPrefixes(mSubPrefixes, mAddChunks);
#ifdef DEBUG
EnsureSorted(&mAddPrefixes);
EnsureSorted(&mSubPrefixes);
EnsureSorted(&mAddCompletes);
EnsureSorted(&mSubCompletes);
LOG(("All databases seem to have a consistent sort order.")); #endif
return NS_OK;
}
nsresult HashStore::AugmentAdds(const nsTArray<uint32_t>& aPrefixes, const nsTArray<nsCString>& aCompletes) { if (aPrefixes.Length() != mAddPrefixes.Length() ||
aCompletes.Length() != mAddCompletes.Length()) {
LOG(( "Amount of prefixes/completes in cache not consistent with store prefixes(%zu vs %zu), \
store completes(%zu vs %zu)",
aPrefixes.Length(), mAddPrefixes.Length(), aCompletes.Length(),
mAddCompletes.Length())); return NS_ERROR_FAILURE;
}
for (size_t i = 0; i < mAddPrefixes.Length(); i++) {
mAddPrefixes[i].prefix.FromUint32(aPrefixes[i]);
}
for (size_t i = 0; i < mAddCompletes.Length(); i++) {
mAddCompletes[i].complete.Assign(aCompletes[i]);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.