Quelle algorithms.rs

Sprache: Rust

// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use alloc::vec;
use alloc::vec::Vec;
use core::hash::{Hash, Hasher};
use twox_hash::XxHash64;

// Const seed to be used with [`XxHash64::with_seed`].
const SEED: u64 = 0xaabbccdd;

/// Split the 64bit `hash` into (g, f0, f1).
/// g denotes the highest 16bits of the hash modulo `m`, and is referred to as first level hash.
/// (f0, f1) denotes the middle, and lower 24bits of the hash respectively.
/// (f0, f1) are used to distribute the keys with same g, into distinct slots.
///
/// # Arguments
///
/// * `hash` - The hash to split.
/// * `m` - The modulo used to split the hash.
pub const fn split_hash64(hash: u64, m: usize) -> (usize, u32, u32) {
    (
        ((hash >> 48) as usize % m),
        ((hash >> 24) as u32 & 0xffffff),
        ((hash & 0xffffff) as u32),
    )
}

/// Compute hash using [`XxHash64`].
pub fn compute_hash<K: Hash + ?Sized>(key: &K) -> u64 {
    let mut hasher = XxHash64::with_seed(SEED);
    key.hash(&mut hasher);
    hasher.finish()
}

/// Calculate the index using (f0, f1), (d0, d1) in modulo m.
/// Returns [`None`] if d is (0, 0) or modulo is 0
/// else returns the index computed using (f0 + f1 * d0 + d1) mod m.
pub fn compute_index(f: (u32, u32), d: (u32, u32), m: u32) -> Option<usize> {
    if d == (0, 0) || m == 0 {
        None
    } else {
        Some((f.1.wrapping_mul(d.0).wrapping_add(f.0).wrapping_add(d.1) % m) as usize)
    }
}

/// Compute displacements for the given `key_hashes`, which split the keys into distinct slots by a
/// two-level hashing schema.
/// Returns a tuple of where the first item is the displacement array and the second item is the
/// reverse mapping used to permute keys, values into their slots.
///
/// 1. Split the hashes into (g, f0, f1).
/// 2. Bucket and sort the split hash on g in descending order.
/// 3. In decreasing order of bucket size, try until a (d0, d1) is found that splits the keys
///    in the bucket into distinct slots.
/// 4. Mark the slots for current bucket as occupied and store the reverse mapping.
/// 5. Repeat untill all the keys have been assigned distinct slots.
///
/// # Arguments
///
/// * `key_hashes` - [`ExactSizeIterator`] over the hashed key values
#[allow(clippy::indexing_slicing, clippy::unwrap_used)]
pub fn compute_displacements(
    key_hashes: impl ExactSizeIterator<Item = u64>,
) -> (Vec<(u32, u32)>, Vec<usize>) {
    let len = key_hashes.len();

    // A vector to track the size of buckets for sorting.
    let mut bucket_sizes = vec![0; len];

    // A flattened representation of items in the buckets after applying first level hash function
    let mut bucket_flatten = Vec::with_capacity(len);

    // Compute initial displacement and bucket sizes

    key_hashes.into_iter().enumerate().for_each(|(i, kh)| {
        let h = split_hash64(kh, len);
        bucket_sizes[h.0] += 1;
        bucket_flatten.push((h, i))
    });

    // Sort by decreasing order of bucket_sizes.
    bucket_flatten.sort_by(|&(ha, _), &(hb, _)| {
        // ha.0, hb.0 are always within bounds of `bucket_sizes`
        (bucket_sizes[hb.0], hb).cmp(&(bucket_sizes[ha.0], ha))
    });

    // Generation count while iterating buckets.
    // Each trial of ((d0, d1), bucket chain) is a new generation.
    // We use this to track which all slots are assigned for the current bucket chain.
    let mut generation = 0;

    // Whether a slot has been occupied by previous buckets with a different first level hash (different
    // bucket chain).
    let mut occupied = vec![false; len];

    // Track generation count for the slots.
    // A slot is empty if either it is unoccupied by the previous bucket chains and the
    // assignment is not equal to generation.
    let mut assignments = vec![0; len];

    // Vec to store the displacements (saves us a recomputation of hash while assigning slots).
    let mut current_displacements = Vec::with_capacity(16);

    // (d0, d1) which splits the bucket into different slots
    let mut displacements = vec![(0, 0); len];

    // Vec to store mapping to the original order of keys.
    // This is a permutation which will be applied to keys, values at the end.
    let mut reverse_mapping = vec![0; len];

    let mut start = 0;
    while start < len {
        // Bucket span with the same first level hash
        // start is always within bounds of `bucket_flatten`
        let g = bucket_flatten[start].0 .0;
        // g is always within bounds of `bucket_sizes`
        let end = start + bucket_sizes[g];
        // start, end - 1 are always within bounds of `bucket_sizes`
        let buckets = &bucket_flatten[start..end];

        'd0: for d0 in 0..len as u32 {
            'd1: for d1 in 0..len as u32 {
                if (d0, d1) == (0, 0) {
                    continue;
                }
                current_displacements.clear();
                generation += 1;

                for ((_, f0, f1), _) in buckets {
                    let displacement_idx = compute_index((*f0, *f1), (d0, d1), len as u32).unwrap();

                    // displacement_idx is always within bounds
                    if occupied[displacement_idx] || assignments[displacement_idx] == generation {
                        continue 'd1;
                    }
                    assignments[displacement_idx] = generation;
                    current_displacements.push(displacement_idx);
                }

                // Successfully found a (d0, d1), store it as index g.
                // g < displacements.len() due to modulo operation
                displacements[g] = (d0, d1);

                for (i, displacement_idx) in current_displacements.iter().enumerate() {
                    // `current_displacements` has same size as `buckets`
                    let (_, idx) = &buckets[i];

                    // displacement_idx is always within bounds
                    occupied[*displacement_idx] = true;
                    reverse_mapping[*displacement_idx] = *idx;
                }
                break 'd0;
            }
        }

        start = end;
    }

    (displacements, reverse_mapping)
}

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.18 Sekunden (vorverarbeitet am 2026-06-27) ¤

Wurzel

Suchen

PVS Prover

Isabelle Prover

NIST Cobol Testsuite

Cephes Mathematical Library

Vienna Development Method

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.