/* * This implementation allocates one large object to do the sorting, which can be reused as many * times as desired. The amount of memory required is logarithmically proportional to the number of * keys to be sorted.
*/
/* Piles smaller than this are handled with a simple insertion sort. */ #define INSERTION_SORT_THRESHOLD 12
/* Sort keys are pointers to immutable fixed-length arrays of bytes. */ typedefconst u8 *sort_key_t;
/* * The keys are separated into piles based on the byte in each keys at the current offset, so the * number of keys with each byte must be counted.
*/ struct histogram { /* The number of non-empty bins */
u16 used; /* The index (key byte) of the first non-empty bin */
u16 first; /* The index (key byte) of the last non-empty bin */
u16 last; /* The number of occurrences of each specific byte */
u32 size[256];
};
/* * Sub-tasks are manually managed on a stack, both for performance and to put a logarithmic bound * on the stack space needed.
*/ struct task { /* Pointer to the first key to sort. */
sort_key_t *first_key; /* Pointer to the last key to sort. */
sort_key_t *last_key; /* The offset into the key at which to continue sorting. */
u16 offset; /* The number of bytes remaining in the sort keys. */
u16 length;
};
/* Compare a segment of two fixed-length keys starting at an offset. */ staticinlineint compare(sort_key_t key1, sort_key_t key2, u16 offset, u16 length)
{ return memcmp(&key1[offset], &key2[offset], length);
}
/* Insert the next unsorted key into an array of sorted keys. */ staticinlinevoid insert_key(conststruct task task, sort_key_t *next)
{ /* Pull the unsorted key out, freeing up the array slot. */
sort_key_t unsorted = *next;
/* Compare the key to the preceding sorted entries, shifting down ones that are larger. */ while ((--next >= task.first_key) &&
(compare(unsorted, next[0], task.offset, task.length) < 0))
next[1] = next[0];
/* Insert the key into the last slot that was cleared, sorting it. */
next[1] = unsorted;
}
/* * Sort a range of key segments using an insertion sort. This simple sort is faster than the * 256-way radix sort when the number of keys to sort is small.
*/ staticinlinevoid insertion_sort(conststruct task task)
{
sort_key_t *next;
for (next = task.first_key + 1; next <= task.last_key; next++)
insert_key(task, next);
}
/* * Count the number of times each byte value appears in the arrays of keys to sort at the current * offset, keeping track of the number of non-empty bins, and the index of the first and last * non-empty bin.
*/ staticinlinevoid measure_bins(conststruct task task, struct histogram *bins)
{
sort_key_t *key_ptr;
/* * Subtle invariant: bins->used and bins->size[] are zero because the sorting code clears * it all out as it goes. Even though this structure is re-used, we don't need to pay to * zero it before starting a new tally.
*/
bins->first = U8_MAX;
bins->last = 0;
for (key_ptr = task.first_key; key_ptr <= task.last_key; key_ptr++) { /* Increment the count for the byte in the key at the current offset. */
u8 bin = (*key_ptr)[task.offset];
u32 size = ++bins->size[bin];
/* Track non-empty bins. */ if (size == 1) {
bins->used += 1; if (bin < bins->first)
bins->first = bin;
if (bin > bins->last)
bins->last = bin;
}
}
}
/* * Convert the bin sizes to pointers to where each pile goes. * * pile[0] = first_key + bin->size[0], * pile[1] = pile[0] + bin->size[1], etc. * * After the keys are moved to the appropriate pile, we'll need to sort each of the piles by the * next radix position. A new task is put on the stack for each pile containing lots of keys, or a * new task is put on the list for each pile containing few keys. * * @stack: pointer the top of the stack * @end_of_stack: the end of the stack * @list: pointer the head of the list * @pile: array for pointers to the end of each pile * @bins: the histogram of the sizes of each pile * @first_key: the first key of the stack * @offset: the next radix position to sort by * @length: the number of bytes remaining in the sort keys * * Return: UDS_SUCCESS or an error code
*/ staticinlineint push_bins(struct task **stack, struct task *end_of_stack, struct task **list, sort_key_t *pile[], struct histogram *bins, sort_key_t *first_key,
u16 offset, u16 length)
{
sort_key_t *pile_start = first_key; int bin;
/* * Sort pointers to fixed-length keys (arrays of bytes) using a radix sort. The sort implementation * is unstable, so the relative ordering of equal keys is not preserved.
*/ int uds_radix_sort(struct radix_sorter *sorter, constunsignedchar *keys[], unsignedint count, unsignedshort length)
{ struct task start; struct histogram *bins = &sorter->bins;
sort_key_t **pile = sorter->pile; struct task *task_stack = sorter->stack;
/* All zero-length keys are identical and therefore already sorted. */ if ((count == 0) || (length == 0)) return UDS_SUCCESS;
/* The initial task is to sort the entire length of all the keys. */
start = (struct task) {
.first_key = keys,
.last_key = &keys[count - 1],
.offset = 0,
.length = length,
};
if (count <= INSERTION_SORT_THRESHOLD) {
insertion_sort(start); return UDS_SUCCESS;
}
if (count > sorter->count) return UDS_INVALID_ARGUMENT;
/* * Repeatedly consume a sorting task from the stack and process it, pushing new sub-tasks * onto the stack for each radix-sorted pile. When all tasks and sub-tasks have been * processed, the stack will be empty and all the keys in the starting task will be fully * sorted.
*/ for (*task_stack = start; task_stack >= sorter->stack; task_stack--) { conststruct task task = *task_stack; struct task *insertion_task_list; int result;
sort_key_t *fence;
sort_key_t *end;
measure_bins(task, bins);
/* * Now that we know how large each bin is, generate pointers for each of the piles * and push a new task to sort each pile by the next radix byte.
*/
insertion_task_list = sorter->insertion_list;
result = push_bins(&task_stack, sorter->end_of_stack,
&insertion_task_list, pile, bins, task.first_key,
task.offset + 1, task.length - 1); if (result != UDS_SUCCESS) {
memset(bins, 0, sizeof(*bins)); return result;
}
/* Now bins->used is zero again. */
/* * Don't bother processing the last pile: when piles 0..N-1 are all in place, then * pile N must also be in place.
*/
end = task.last_key - bins->size[bins->last];
bins->size[bins->last] = 0;
/* * The radix byte of the key tells us which pile it belongs in. Swap it for * an unprocessed item just below that pile, and repeat.
*/ while (--pile[bin = key[task.offset]] > fence)
swap_keys(pile[bin], &key);
/* * The pile reached the fence. Put the key at the bottom of that pile, * completing it, and advance the fence to the next pile.
*/
*fence = key;
fence += bins->size[bin];
bins->size[bin] = 0;
}
/* Now bins->size[] is all zero again. */
/* * When the number of keys in a task gets small enough, it is faster to use an * insertion sort than to keep subdividing into tiny piles.
*/ while (--insertion_task_list >= sorter->insertion_list)
insertion_sort(*insertion_task_list);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.