/** * ucs_is_zero_width() - Determine if a Unicode code point is zero-width. * @cp: Unicode code point (UCS-4) * * Return: true if the character is zero-width, false otherwise
*/ bool ucs_is_zero_width(u32 cp)
{ if (UCS_IS_BMP(cp)) return cp_in_range16(cp, ucs_zero_width_bmp_ranges,
ARRAY_SIZE(ucs_zero_width_bmp_ranges)); else return cp_in_range32(cp, ucs_zero_width_non_bmp_ranges,
ARRAY_SIZE(ucs_zero_width_non_bmp_ranges));
}
/** * ucs_is_double_width() - Determine if a Unicode code point is double-width. * @cp: Unicode code point (UCS-4) * * Return: true if the character is double-width, false otherwise
*/ bool ucs_is_double_width(u32 cp)
{ if (UCS_IS_BMP(cp)) return cp_in_range16(cp, ucs_double_width_bmp_ranges,
ARRAY_SIZE(ucs_double_width_bmp_ranges)); else return cp_in_range32(cp, ucs_double_width_non_bmp_ranges,
ARRAY_SIZE(ucs_double_width_non_bmp_ranges));
}
/* * Structure for base with combining mark pairs and resulting recompositions. * Using u16 to save space since all values are within BMP range.
*/ struct ucs_recomposition {
u16 base; /* base character */
u16 mark; /* combining mark */
u16 recomposed; /* corresponding recomposed character */
};
/* Compare base character first */ if (search_key->base < entry->base) return -1; if (search_key->base > entry->base) return 1;
/* Base characters match, now compare combining character */ if (search_key->mark < entry->mark) return -1; if (search_key->mark > entry->mark) return 1;
/* Both match */ return 0;
}
/** * ucs_recompose() - Attempt to recompose two Unicode characters into a single character. * @base: Base Unicode code point (UCS-4) * @mark: Combining mark Unicode code point (UCS-4) * * Return: Recomposed Unicode code point, or 0 if no recomposition is possible
*/
u32 ucs_recompose(u32 base, u32 mark)
{ /* Check if characters are within the range of our table */ if (base < UCS_RECOMPOSE_MIN_BASE || base > UCS_RECOMPOSE_MAX_BASE ||
mark < UCS_RECOMPOSE_MIN_MARK || mark > UCS_RECOMPOSE_MAX_MARK) return 0;
/* * The fallback table structures implement a 2-level lookup.
*/
struct ucs_page_desc {
u8 page; /* Page index (high byte of code points) */
u8 count; /* Number of entries in this page */
u16 start; /* Start index in entries array */
};
struct ucs_page_entry {
u8 offset; /* Offset within page (0-255) */
u8 fallback; /* Fallback character or range start marker */
};
if (offset < entry->offset) return -1; if (entry->fallback == UCS_PAGE_ENTRY_RANGE_MARKER) { if (offset > entry[1].offset) return 1;
} else { if (offset > entry->offset) return 1;
} return 0;
}
/** * ucs_get_fallback() - Get a substitution for the provided Unicode character * @cp: Unicode code point (UCS-4) * * Get a simpler fallback character for the provided Unicode character. * This is used for terminal display when corresponding glyph is unavailable. * The substitution may not be as good as the actual glyph for the original * character but still way more helpful than a squared question mark. * * Return: Fallback Unicode code point, or 0 if none is available
*/
u32 ucs_get_fallback(u32 cp)
{ conststruct ucs_page_desc *page; conststruct ucs_page_entry *entry;
u8 page_idx = cp >> 8, offset = cp;
if (!UCS_IS_BMP(cp)) return 0;
/* * Full-width to ASCII mapping (covering all printable ASCII 33-126) * 0xFF01 (!) to 0xFF5E (~) -> ASCII 33 (!) to 126 (~) * We process them programmatically to reduce the table size.
*/ if (cp >= 0xFF01 && cp <= 0xFF5E) return cp - 0xFF01 + 33;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.