/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/ArrayUtils.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include <cstring>
#include "js/CharacterEncoding.h"
#include "js/CompilationAndEvaluation.h" // JS::Compile
#include "js/Exception.h"
#include "js/friend/ErrorMessages.h" // JSMSG_*
#include "js/SourceText.h"
#include "jsapi-tests/tests.h"
#include "util/Text.h"
#include "vm/ErrorReporting.h"
using mozilla::ArrayEqual;
using mozilla::IsAsciiHexDigit;
using mozilla::Utf8Unit;
static bool contains(
const char* str,
const char* substr) {
return std::strstr(str, substr) != nullptr;
}
BEGIN_TEST(testUtf8BadBytes) {
static const char badLeadingUnit[] =
"var x = \x80";
CHECK(testBadUtf8(
badLeadingUnit, JSMSG_BAD_LEADING_UTF8_UNIT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(startsWith(chars,
"0x80"));
CHECK(isBadLeadUnitMessage(chars));
return true;
},
"0x80"));
static const char badSecondInTwoByte[] =
"var x = \xDF\x20";
CHECK(testBadUtf8(
badSecondInTwoByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isBadTrailingBytesMessage(chars));
CHECK(contains(chars,
"0x20"));
return true;
},
"0xDF 0x20"));
static const char badSecondInThreeByte[] =
"var x = \xEF\x17\xA7";
CHECK(testBadUtf8(
badSecondInThreeByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isBadTrailingBytesMessage(chars));
CHECK(contains(chars,
"0x17"));
return true;
},
// Validating stops with the first invalid code unit and
// shouldn't go beyond that.
"0xEF 0x17"));
static const char lengthTwoTooShort[] =
"var x = \xDF";
CHECK(testBadUtf8(
lengthTwoTooShort, JSMSG_NOT_ENOUGH_CODE_UNITS,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isNotEnoughUnitsMessage(chars));
CHECK(contains(chars,
"0xDF"));
CHECK(contains(chars,
" 1 byte, but 0 bytes were present"));
return true;
},
"0xDF"));
static const char forbiddenHighSurrogate[] =
"var x = \xED\xA2\x87";
CHECK(testBadUtf8(
forbiddenHighSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isSurrogateMessage(chars));
CHECK(contains(chars,
"0xD887"));
return true;
},
"0xED 0xA2 0x87"));
static const char forbiddenLowSurrogate[] =
"var x = \xED\xB7\xAF";
CHECK(testBadUtf8(
forbiddenLowSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isSurrogateMessage(chars));
CHECK(contains(chars,
"0xDDEF"));
return true;
},
"0xED 0xB7 0xAF"));
static const char oneTooBig[] =
"var x = \xF4\x90\x80\x80";
CHECK(testBadUtf8(
oneTooBig, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isTooBigMessage(chars));
CHECK(contains(chars,
"0x110000"));
return true;
},
"0xF4 0x90 0x80 0x80"));
static const char notShortestFormZero[] =
"var x = \xC0\x80";
CHECK(testBadUtf8(
notShortestFormZero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isNotShortestFormMessage(chars));
CHECK(startsWith(chars,
"0x0 isn't "));
return true;
},
"0xC0 0x80"));
static const char notShortestFormNonzero[] =
"var x = \xE0\x87\x80";
CHECK(testBadUtf8(
notShortestFormNonzero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
[
this](JS::ConstUTF8CharsZ message) {
const char* chars = message.c_str();
CHECK(isNotShortestFormMessage(chars));
CHECK(startsWith(chars,
"0x1C0 isn't "));
return true;
},
"0xE0 0x87 0x80"));
return true;
}
static constexpr size_t LengthOfByte = js_strlen(
"0xFF");
static bool startsWithByte(
const char* str) {
return str[0] ==
'0' && str[1] ==
'x' && IsAsciiHexDigit(str[2]) &&
IsAsciiHexDigit(str[3]);
}
static bool startsWith(
const char* str,
const char* prefix) {
return std::strncmp(prefix, str, strlen(prefix)) == 0;
}
static bool equals(
const char* str,
const char* expected) {
return std::strcmp(str, expected) == 0;
}
static bool isBadLeadUnitMessage(
const char* str) {
return startsWithByte(str) &&
equals(str + LengthOfByte,
" byte doesn't begin a valid UTF-8 code point");
}
static bool isBadTrailingBytesMessage(
const char* str) {
return startsWith(str,
"bad trailing UTF-8 byte ");
}
static bool isNotEnoughUnitsMessage(
const char* str) {
return startsWithByte(str) &&
startsWith(str + LengthOfByte,
" byte in UTF-8 must be followed by ");
}
static bool isForbiddenCodePointMessage(
const char* str) {
return contains(str,
"isn't a valid code point because");
}
static bool isSurrogateMessage(
const char* str) {
return isForbiddenCodePointMessage(str) &&
contains(str,
" it's a UTF-16 surrogate");
}
static bool isTooBigMessage(
const char* str) {
return isForbiddenCodePointMessage(str) &&
contains(str,
"the maximum code point is U+10FFFF");
}
static bool isNotShortestFormMessage(
const char* str) {
return isForbiddenCodePointMessage(str) &&
contains(str,
"it wasn't encoded in shortest possible form");
}
template <size_t N,
typename TestMessage>
bool testBadUtf8(
const char (&chars)[N],
unsigned errorNumber,
TestMessage testMessage,
const char* badBytes) {
JS::Rooted<JSScript*> script(cx);
{
JS::CompileOptions options(cx);
JS::SourceText<mozilla::Utf8Unit> srcBuf;
CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
script = JS::Compile(cx, options, srcBuf);
CHECK(!script);
}
JS::ExceptionStack exnStack(cx);
CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
JS::ErrorReportBuilder report(cx);
CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
const auto* errorReport = report.report();
CHECK(errorReport->errorNumber == errorNumber);
CHECK(testMessage(errorReport->message()));
{
const auto& notes = errorReport->notes;
CHECK(notes != nullptr);
auto iter = notes->begin();
CHECK(iter != notes->end());
const char* noteMessage = (*iter)->message().c_str();
// The prefix ought always be the same.
static constexpr
char expectedPrefix[] =
"the code units comprising this invalid code point were: ";
constexpr size_t expectedPrefixLen = js_strlen(expectedPrefix);
CHECK(startsWith(noteMessage, expectedPrefix));
// The end of the prefix is the bad bytes.
CHECK(equals(noteMessage + expectedPrefixLen, badBytes));
++iter;
CHECK(iter == notes->end());
}
static constexpr char16_t expectedContext[] = u
"var x = ";
constexpr size_t expectedContextLen = js_strlen(expectedContext);
const char16_t* lineOfContext = errorReport->linebuf();
size_t lineOfContextLength = errorReport->linebufLength();
CHECK(lineOfContext[lineOfContextLength] ==
'\0');
CHECK(lineOfContextLength == expectedContextLen);
CHECK(std::memcmp(lineOfContext, expectedContext,
expectedContextLen *
sizeof(char16_t)) == 0);
return true;
}
END_TEST(testUtf8BadBytes)
BEGIN_TEST(testMultiUnitUtf8InWindow) {
static const char firstInWindowIsMultiUnit[] =
"\xCF\x80\xCF\x80 = 6.283185307; @ bad starts HERE:\x80\xFF\xFF";
CHECK(testContext(firstInWindowIsMultiUnit,
u
"ππ = 6.283185307; @ bad starts HERE:"));
static const char atTokenOffsetIsMulti[] =
"var z = 💯";
CHECK(testContext(atTokenOffsetIsMulti, u
"var z = 💯"));
static const char afterTokenOffsetIsMulti[] =
"var z = @💯💯💯X";
CHECK(testContext(afterTokenOffsetIsMulti, u
"var z = @💯💯💯X"));
static const char atEndIsMulti[] =
"var z = @@💯💯💯";
CHECK(testContext(atEndIsMulti, u
"var z = @@💯💯💯"));
return true;
}
template <size_t N, size_t ContextLenWithNull>
bool testContext(
const char (&chars)[N],
const char16_t (&expectedContext)[ContextLenWithNull]) {
JS::Rooted<JSScript*> script(cx);
{
JS::CompileOptions options(cx);
JS::SourceText<mozilla::Utf8Unit> srcBuf;
CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
script = JS::Compile(cx, options, srcBuf);
CHECK(!script);
}
JS::ExceptionStack exnStack(cx);
CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
JS::ErrorReportBuilder report(cx);
CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
const auto* errorReport = report.report();
CHECK(errorReport->errorNumber == JSMSG_ILLEGAL_CHARACTER);
const char16_t* lineOfContext = errorReport->linebuf();
size_t lineOfContextLength = errorReport->linebufLength();
CHECK(lineOfContext[lineOfContextLength] ==
'\0');
CHECK(lineOfContextLength == ContextLenWithNull - 1);
CHECK(ArrayEqual(lineOfContext, expectedContext, ContextLenWithNull));
return true;
}
END_TEST(testMultiUnitUtf8InWindow)
BEGIN_TEST(testCompileJsonModule) {
static const char chars[] =
"{ \"a\
": 1, \"b\
": 2, \"c\
": \"foo\
" }";
JS::Rooted<JSObject*> module(cx);
{
JS::CompileOptions options(cx);
JS::SourceText<mozilla::Utf8Unit> srcBuf;
CHECK(srcBuf.init(cx, chars, js_strlen(chars),
JS::SourceOwnership::Borrowed));
module = JS::CompileJsonModule(cx, options, srcBuf);
CHECK(module);
}
return true;
}
END_TEST(testCompileJsonModule)
BEGIN_TEST(testCompileJsonModuleInvalidJson) {
static const char chars[] =
"{ \"a\
": 1, \"b\
": 2, \"c\
":";
JS::Rooted<JSObject*> module(cx);
{
JS::CompileOptions options(cx);
JS::SourceText<mozilla::Utf8Unit> srcBuf;
CHECK(srcBuf.init(cx, chars, js_strlen(chars),
JS::SourceOwnership::Borrowed));
module = JS::CompileJsonModule(cx, options, srcBuf);
CHECK(!module);
}
JS::ExceptionStack exnStack(cx);
CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
JS::ErrorReportBuilder report(cx);
CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
const auto* errorReport = report.report();
CHECK(errorReport->errorNumber == JSMSG_JSON_BAD_PARSE);
CHECK(contains(errorReport->message().c_str(),
"JSON.parse: unexpected end of data"));
return true;
}
END_TEST(testCompileJsonModuleInvalidJson)
BEGIN_TEST(testCompileJsonModuleBadUtf8) {
static const char chars[] =
"{ \"a\
": 1, \"b\
": 2, \"c\
": \"\xDF\x20\
" }";
JS::Rooted<JSObject*> module(cx);
{
JS::CompileOptions options(cx);
JS::SourceText<mozilla::Utf8Unit> srcBuf;
CHECK(srcBuf.init(cx, chars, js_strlen(chars),
JS::SourceOwnership::Borrowed));
module = JS::CompileJsonModule(cx, options, srcBuf);
CHECK(!module);
}
JS::ExceptionStack exnStack(cx);
CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
JS::ErrorReportBuilder report(cx);
CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
const auto* errorReport = report.report();
CHECK(errorReport->errorNumber == JSMSG_MALFORMED_UTF8_CHAR);
CHECK(contains(errorReport->message().c_str(),
"malformed UTF-8 character sequence at offset"));
return true;
}
END_TEST(testCompileJsonModuleBadUtf8)