/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef nsUTF8Utils_h_ #define nsUTF8Utils_h_
// NB: This code may be used from non-XPCOM code, in particular, the // standalone updater executable. That is, this file may be used in // two ways: if MOZILLA_INTERNAL_API is defined, this file will // provide signatures for the Mozilla abstract string types. It will // use XPCOM assertion/debugging macros, etc.
class UTF8traits { public: staticbool isASCII(char aChar) { return (aChar & 0x80) == 0x00; } staticbool isInSeq(char aChar) { return (aChar & 0xC0) == 0x80; } staticbool is2byte(char aChar) { return (aChar & 0xE0) == 0xC0; } staticbool is3byte(char aChar) { return (aChar & 0xF0) == 0xE0; } staticbool is4byte(char aChar) { return (aChar & 0xF8) == 0xF0; } staticbool is5byte(char aChar) { return (aChar & 0xFC) == 0xF8; } staticbool is6byte(char aChar) { return (aChar & 0xFE) == 0xFC; } // return the number of bytes in a sequence beginning with aChar staticint bytes(char aChar) { if (isASCII(aChar)) { return 1;
} if (is2byte(aChar)) { return 2;
} if (is3byte(aChar)) { return 3;
} if (is4byte(aChar)) { return 4;
}
MOZ_ASSERT_UNREACHABLE("should not be used for in-sequence characters"); return 1;
}
};
/** * Extract the next Unicode scalar value from the buffer and return it. The * pointer passed in is advanced to the start of the next character in the * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced * over the maximal valid prefix and *aErr is set to true (if aErr is not * null). * * Note: This method never sets *aErr to false to allow error accumulation * across multiple calls. * * Precondition: *aBuffer < aEnd
*/ class UTF8CharEnumerator { public: staticinline char32_t NextChar(constchar** aBuffer, constchar* aEnd, bool* aErr = nullptr) {
MOZ_ASSERT(aBuffer, "null buffer pointer pointer");
MOZ_ASSERT(aEnd, "null end pointer");
constunsignedchar* p = reinterpret_cast<constunsignedchar*>(*aBuffer); constunsignedchar* end = reinterpret_cast<constunsignedchar*>(aEnd);
/** * Extract the next Unicode scalar value from the buffer and return it. The * pointer passed in is advanced to the start of the next character in the * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced over * the unpaired surrogate and *aErr is set to true (if aErr is not null). * * Note: This method never sets *aErr to false to allow error accumulation * across multiple calls. * * Precondition: *aBuffer < aEnd
*/ class UTF16CharEnumerator { public: staticinline char32_t NextChar(const char16_t** aBuffer, const char16_t* aEnd, bool* aErr = nullptr) {
MOZ_ASSERT(aBuffer, "null buffer pointer pointer");
MOZ_ASSERT(aEnd, "null end pointer");
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.