// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses.
*/
for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(matchIndex < curr); /* note : all candidates are now supposed sorted, * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
* when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
if ( (dictMode != ZSTD_extDict)
|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/
|| (curr < dictLimit) /* both in extDict */) { const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
|| (matchIndex+matchLength >= dictLimit)) ?
base : dictBase;
assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
|| (curr < dictLimit) );
match = mBase + matchIndex;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* preparation for next read of match[matchLength] */
}
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
curr, matchIndex, (U32)matchLength);
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
}
if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ /* match is smaller than current */
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
matchIndex, btLow, nextPtr[1]);
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
} else { /* match is larger than current */
*largerPtr = matchIndex;
commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
matchIndex, btLow, nextPtr[0]);
largerPtr = nextPtr;
matchIndex = nextPtr[0];
} }
for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match = dictBase + dictMatchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (dictMatchIndex+matchLength >= dictHighLimit)
match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
if (matchLength > bestLength) {
U32 matchIndex = dictMatchIndex + dictIndexDelta; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
} if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ break; /* drop, to guarantee consistency (miss a little bit of compression) */
}
}
if (match[matchLength] < ip[matchLength]) { if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
} else { /* match is larger than current */ if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
commonLengthLarger = matchLength;
dictMatchIndex = nextPtr[0];
}
}
if (bestLength >= MINMATCH) {
U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
} return bestLength;
for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match;
if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
if (matchLength > bestLength) { if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex); if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (dictMode == ZSTD_dictMatchState) {
nbCompares = 0; /* in addition to avoiding checking any * further in this loop, make sure we
* skip checking in the dictionary. */
} break; /* drop, to guarantee consistency (miss a little bit of compression) */
}
}
if (match[matchLength] < ip[matchLength]) { /* match is smaller than current */
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
} else { /* match is larger than current */
*largerPtr = matchIndex;
commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
largerPtr = nextPtr;
matchIndex = nextPtr[0];
} }
/* We know the hashtable is oversized by a factor of `bucketSize`. * We are going to temporarily pretend `bucketSize == 1`, keeping only a * single entry. We will use the rest of the space to construct a temporary * chaintable.
*/
U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
U32* const tmpHashTable = hashTable;
U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
U32 hashIdx;
/* fill conventional hash table and conventional chain table */ for ( ; idx < target; idx++) {
U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); if (idx >= tmpMinChain) {
tmpChainTable[idx - tmpMinChain] = hashTable[h];
}
tmpHashTable[h] = idx;
}
/* sort chains into ddss chain table */
{
U32 chainPos = 0; for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
U32 count;
U32 countBeyondMinChain = 0;
U32 i = tmpHashTable[hashIdx]; for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { /* skip through the chain to the first position that won't be
* in the hash cache bucket */ if (i < minChain) {
countBeyondMinChain++;
}
i = tmpChainTable[i - tmpMinChain];
} if (count == cacheSize) { for (count = 0; count < chainLimit;) { if (i < minChain) { if (!i || ++countBeyondMinChain > cacheSize) { /* only allow pulling `cacheSize` number of entries * into the cache or chainTable beyond `minChain`, * to replace the entries pulled out of the * chainTable into the cache. This lets us reach * back further without increasing the total number * of entries in the chainTable, guaranteeing the * DDSS chain table will fit into the space
* allocated for the regular one. */ break;
}
}
chainTable[chainPos++] = i;
count++; if (i < tmpMinChain) { break;
}
i = tmpChainTable[i - tmpMinChain];
}
} else {
count = 0;
} if (count) {
tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
} else {
tmpHashTable[hashIdx] = 0;
}
}
assert(chainPos <= chainSize); /* I believe this is guaranteed... */
}
/* move chain pointers into the last entry of each hash bucket */ for (hashIdx = (1 << hashLog); hashIdx; ) {
U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
U32 const chainPackedPointer = tmpHashTable[hashIdx];
U32 i; for (i = 0; i < cacheSize; i++) {
hashTable[bucketIdx + i] = 0;
}
hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
}
/* fill the buckets of the hash table */ for (idx = ms->nextToUpdate; idx < target; idx++) {
U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
<< ZSTD_LAZY_DDSS_BUCKET_LOG;
U32 i; /* Shift hash cache down 1. */ for (i = cacheSize - 1; i; i--)
hashTable[h + i] = hashTable[h + i - 1];
hashTable[h] = idx;
}
ms->nextToUpdate = target;
}
/* Returns the longest match length found in the dedicated dict search structure. * If none are longer than the argument ml, then ml will be returned.
*/
FORCE_INLINE_TEMPLATE
size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, const ZSTD_MatchState_t* const dms, const BYTE* const ip, const BYTE* const iLimit, const BYTE* const prefixStart, const U32 curr, const U32 dictLimit, const size_t ddsIdx) { const U32 ddsLowestIndex = dms->window.dictLimit; const BYTE* const ddsBase = dms->window.base; const BYTE* const ddsEnd = dms->window.nextSrc; const U32 ddsSize = (U32)(ddsEnd - ddsBase); const U32 ddsIndexDelta = dictLimit - ddsSize; const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
U32 ddsAttempt;
U32 matchIndex;
/* save best solution */ if (currentMl > ml) {
ml = currentMl;
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); if (ip+currentMl == iLimit) { /* best possible, avoids read overflow on next attempt */ return ml;
}
}
}
while(idx < target) { /* catch up */
size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
hashTable[h] = idx;
idx++; /* Stop inserting every position when in the lazy skipping mode. */ if (lazySkipping) break;
}
/* HC4 match finder */
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
size_t currentMl=0; if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { const BYTE* const match = base + matchIndex;
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
currentMl = ZSTD_count(ip, match, iLimit);
} else { const BYTE* const match = dictBase + matchIndex;
assert(match+4 <= dictEnd); if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
}
/* save best solution */ if (currentMl > ml) {
ml = currentMl;
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
}
if (matchIndex <= minChain) break;
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
}
/* ********************************* * (SIMD) Row-based matchfinder
***********************************/ /* Constants for row-based hash */ #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */
/* ZSTD_VecMask_next(): * Starting from the LSB, returns the idx of the next non-zero bit. * Basically counting the nb of trailing zeroes.
*/
MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) { return ZSTD_countTrailingZeros64(val);
}
/* ZSTD_row_nextIndex(): * Returns the next index to insert at within a tagTable row, and updates the "head" * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
U32 next = (*tagRow-1) & rowMask;
next += (next == 0) ? rowMask : 0; /* skip first position */
*tagRow = (BYTE)next; return next;
}
/* ZSTD_isAligned(): * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
*/
MEM_STATIC int ZSTD_isAligned(voidconst* ptr, size_t align) {
assert((align & (align - 1)) == 0); return (((size_t)ptr) & (align - 1)) == 0;
}
/* ZSTD_row_prefetch(): * Performs prefetching for the hashTable and tagTable at a given row.
*/
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
PREFETCH_L1(hashTable + relRow); if (rowLog >= 5) {
PREFETCH_L1(hashTable + relRow + 16); /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */
}
PREFETCH_L1(tagTable + relRow); if (rowLog == 6) {
PREFETCH_L1(tagTable + relRow + 32);
}
assert(rowLog == 4 || rowLog == 5 || rowLog == 6);
assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */
assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */
}
/* ZSTD_row_update_internal(): * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. * Skips sections of long matches as is necessary.
*/
FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR void ZSTD_row_update_internal(ZSTD_MatchState_t* ms, const BYTE* ip,
U32 const mls, U32 const rowLog,
U32 const rowMask, U32 const useCache)
{
U32 idx = ms->nextToUpdate; const BYTE* const base = ms->window.base; const U32 target = (U32)(ip - base); const U32 kSkipThreshold = 384; const U32 kMaxMatchStartPositionsToUpdate = 96; const U32 kMaxMatchEndPositionsToUpdate = 32;
if (useCache) { /* Only skip positions when using hash cache, i.e. * if we are loading a dict, don't skip anything. * If we decide to skip, then we only update a set number * of positions at the beginning and end of the match.
*/ if (UNLIKELY(target - idx > kSkipThreshold)) {
U32 const bound = idx + kMaxMatchStartPositionsToUpdate;
ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache);
idx = target - kMaxMatchEndPositionsToUpdate;
ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1);
}
}
assert(target >= idx);
ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache);
ms->nextToUpdate = target;
}
/* ZSTD_row_update(): * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary * processing.
*/ void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip) { const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); const U32 rowMask = (1u << rowLog) - 1; const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
/* Returns the mask width of bits group of which will be set to 1. Given not all * architectures have easy movemask instruction, this helps to iterate over * groups of bits easier and faster.
*/
FORCE_INLINE_TEMPLATE U32
ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
{
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
(void)rowEntries; #ifdefined(ZSTD_ARCH_ARM_NEON) /* NEON path only works for little endian */ if (!MEM_isLittleEndian()) { return 1;
} if (rowEntries == 16) { return 4;
} if (rowEntries == 32) { return 2;
} if (rowEntries == 64) { return 1;
} #endif return 1;
}
/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag" * matches the hash at the nth position in a row of the tagTable. * Each row is a circular buffer beginning at the value of "headGrouped". So we * must rotate the "matches" bitfield to match up with the actual layout of the
* entries within the hashTable */
FORCE_INLINE_TEMPLATE ZSTD_VecMask
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
{ const BYTE* const src = tagRow;
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
# ifdefined(ZSTD_ARCH_ARM_NEON) /* This NEON path only works for little endian - otherwise use SWAR below */ if (MEM_isLittleEndian()) { return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
} # endif /* ZSTD_ARCH_ARM_NEON */ /* SWAR */
{ constint chunkSize = sizeof(size_t); const size_t shiftAmount = ((chunkSize * 8) - chunkSize); const size_t xFF = ~((size_t)0); const size_t x01 = xFF / 0xFF; const size_t x80 = x01 << 7; const size_t splatChar = tag * x01;
ZSTD_VecMask matches = 0; int i = rowEntries - chunkSize;
assert((sizeof(size_t) == 4) || (sizeof(size_t) == 8)); if (MEM_isLittleEndian()) { /* runtime check so have two loops */ const size_t extractMagic = (xFF / 0x7F) >> chunkSize; do {
size_t chunk = MEM_readST(&src[i]);
chunk ^= splatChar;
chunk = (((chunk | x80) - x01) | chunk) & x80;
matches <<= chunkSize;
matches |= (chunk * extractMagic) >> shiftAmount;
i -= chunkSize;
} while (i >= 0);
} else { /* big endian: reverse bits during extraction */ const size_t msb = xFF ^ (xFF >> 1); const size_t extractMagic = (msb / 0x1FF) | msb; do {
size_t chunk = MEM_readST(&src[i]);
chunk ^= splatChar;
chunk = (((chunk | x80) - x01) | chunk) & x80;
matches <<= chunkSize;
matches |= ((chunk >> 7) * extractMagic) >> shiftAmount;
i -= chunkSize;
} while (i >= 0);
}
matches = ~matches; if (rowEntries == 16) { return ZSTD_rotateRight_U16((U16)matches, headGrouped);
} elseif (rowEntries == 32) { return ZSTD_rotateRight_U32((U32)matches, headGrouped);
} else { return ZSTD_rotateRight_U64((U64)matches, headGrouped);
}
} #endif
}
/* The high-level approach of the SIMD row based match finder is as follows: * - Figure out where to insert the new entry: * - Generate a hash for current input position and split it into a one byte of tag and `rowHashLog` bits of index. * - The hash is salted by a value that changes on every context reset, so when the same table is used * we will avoid collisions that would otherwise slow us down by introducing phantom matches. * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines * which row to insert into. * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes * per row). * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and * generate a bitfield that we can cycle through to check the collisions in the hash table. * - Pick the longest match. * - Insert the tag into the equivalent row and position in the tagTable.
*/
FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_RowFindBestMatch(
ZSTD_MatchState_t* ms, const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr, const U32 mls, const ZSTD_dictMode_e dictMode, const U32 rowLog)
{
U32* const hashTable = ms->hashTable;
BYTE* const tagTable = ms->tagTable;
U32* const hashCache = ms->hashCache; const U32 hashLog = ms->rowHashLog; const ZSTD_compressionParameters* const cParams = &ms->cParams; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const U32 curr = (U32)(ip-base); const U32 maxDistance = 1U << cParams->windowLog; const U32 lowestValid = ms->window.lowLimit; const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; const U32 isDictionary = (ms->loadedDictEnd != 0); const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; const U32 rowEntries = (1U << rowLog); const U32 rowMask = rowEntries - 1; const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries); const U64 hashSalt = ms->hashSalt;
U32 nbAttempts = 1U << cappedSearchLog;
size_t ml=4-1;
U32 hash;
/* DMS/DDS variables that may be referenced laster */ const ZSTD_MatchState_t* const dms = ms->dictMatchState;
/* Initialize the following variables to satisfy static analyzer */
size_t ddsIdx = 0;
U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
U32 dmsTag = 0;
U32* dmsRow = NULL;
BYTE* dmsTagRow = NULL;
/* Update the hashTable and tagTable up to (but not including) ip */ if (!ms->lazySkipping) {
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
} else { /* Stop inserting every position when in the lazy skipping mode. * The hash cache is also not kept up to date in this mode.
*/
hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
ms->nextToUpdate = curr;
}
ms->hashSaltEntropy += hash; /* collect salt entropy */
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
in ZSTD_row_update_internal() at the next search. */
{
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
tagRow[pos] = (BYTE)tag;
row[pos] = ms->nextToUpdate++;
}
/* Return the longest match */ for (; currMatch < numMatches; ++currMatch) {
U32 const matchIndex = matchBuffer[currMatch];
size_t currentMl=0;
assert(matchIndex < curr);
assert(matchIndex >= lowLimit);
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { const BYTE* const match = base + matchIndex;
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
currentMl = ZSTD_count(ip, match, iLimit);
} else { const BYTE* const match = dictBase + matchIndex;
assert(match+4 <= dictEnd); if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
}
/* Save best solution */ if (currentMl > ml) {
ml = currentMl;
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
}
}
}
/* * Generate search functions templated on (dictMode, mls, rowLog). * These functions are outlined for code size & compilation time. * ZSTD_searchMax() dispatches to the correct implementation function. * * TODO: The start of the search function involves loading and calculating a * bunch of constants from the ZSTD_MatchState_t. These computations could be * done in an initialization function, and saved somewhere in the match state. * Then we could pass a pointer to the saved state instead of the match state, * and avoid duplicate computations. * * TODO: Move the match re-winding into searchMax. This improves compression * ratio, and unlocks further simplifications with the next TODO. * * TODO: Try moving the repcode search into searchMax. After the re-winding * and repcode search are in searchMax, there is no more logic in the match * finder loop that requires knowledge about the dictMode. So we should be * able to avoid force inlining it, and we can join the extDict loop with * the single segment loop. It should go in searchMax instead of its own * function to avoid having multiple virtual function calls per search.
*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.