/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// There are three kinds of samples done by the profiler.
//
// - A "periodic" sample is the most complex kind. It is done in response to a
// timer while the profiler is active. It involves writing a stack trace plus
// a variety of other values (memory measurements, responsiveness
// measurements, markers, etc.) into the main ProfileBuffer. The sampling is
// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
// get the register values.
//
// - A "synchronous" sample is a simpler kind. It is done in response to an API
// call (profiler_get_backtrace()). It involves writing a stack trace and
// little else into a temporary ProfileBuffer, and wrapping that up in a
// ProfilerBacktrace that can be subsequently used in a marker. The sampling
// is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the
// register values.
//
// - A "backtrace" sample is the simplest kind. It is done in response to an
// API call (profiler_suspend_and_sample_thread()). It involves getting a
// stack trace via a ProfilerStackCollector; it does not write to a
// ProfileBuffer. The sampling is done from off-thread, and so uses
// SuspendAndSampleAndResumeThread() to get the register values.
#include "platform.h"
#include "GeckoProfiler.h"
#include "GeckoProfilerReporter.h"
#include "PageInformation.h"
#include "PowerCounters.h"
#include "ProfileBuffer.h"
#include "ProfiledThreadData.h"
#include "ProfilerBacktrace.h"
#include "ProfilerChild.h"
#include "ProfilerCodeAddressService.h"
#include "ProfilerControl.h"
#include "ProfilerCPUFreq.h"
#include "ProfilerIOInterposeObserver.h"
#include "ProfilerParent.h"
#include "ProfilerNativeStack.h"
#include "ProfilerStackWalk.h"
#include "ProfilerRustBindings.h"
#include "mozilla/Assertions.h"
#include "mozilla/Likely.h"
#include "mozilla/Maybe.h"
#include "mozilla/MozPromise.h"
#include "mozilla/Perfetto.h"
#include "nsCOMPtr.h"
#include "nsDebug.h"
#include "nsISupports.h"
#include "nsXPCOM.h"
#include "SharedLibraries.h"
#include "VTuneProfiler.h"
#include "ETWTools.h"
#include "js/ProfilingFrameIterator.h"
#include "memory_counter.h"
#include "memory_hooks.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/AutoProfilerLabel.h"
#include "mozilla/BaseAndGeckoProfilerDetail.h"
#include "mozilla/CycleCollectedJSContext.h"
#include "mozilla/ExtensionPolicyService.h"
#include "mozilla/extensions/WebExtensionPolicy.h"
#include "mozilla/glean/ProcesstoolsMetrics.h"
#include "mozilla/Monitor.h"
#include "mozilla/Preferences.h"
#include "mozilla/Printf.h"
#include "mozilla/ProcInfo.h"
#include "mozilla/ProfilerBufferSize.h"
#include "mozilla/ProfileBufferChunkManagerSingle.h"
#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
#include "mozilla/ProfileChunkedBuffer.h"
#include "mozilla/ProfilerBandwidthCounter.h"
#include "mozilla/SchedulerGroup.h"
#include "mozilla/Services.h"
#include "mozilla/StackWalk.h"
#include "mozilla/Try.h"
#ifdef XP_WIN
# include
"mozilla/NativeNt.h"
# include
"mozilla/StackWalkThread.h"
# include
"mozilla/WindowsStackWalkInitialization.h"
#endif
#include "mozilla/StaticPtr.h"
#include "mozilla/ThreadLocal.h"
#include "mozilla/TimeStamp.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Vector.h"
#include "BaseProfiler.h"
#include "nsDirectoryServiceDefs.h"
#include "nsDirectoryServiceUtils.h"
#include "nsIDocShell.h"
#include "nsIHttpProtocolHandler.h"
#include "nsIObserverService.h"
#include "nsIPropertyBag2.h"
#include "nsIXULAppInfo.h"
#include "nsIXULRuntime.h"
#include "nsJSPrincipals.h"
#include "nsMemoryReporterManager.h"
#include "nsPIDOMWindow.h"
#include "nsProfilerStartParams.h"
#include "nsScriptSecurityManager.h"
#include "nsSystemInfo.h"
#include "nsThreadUtils.h"
#include "nsXULAppAPI.h"
#include "nsDirectoryServiceUtils.h"
#include "Tracing.h"
#include "prdtoa.h"
#include "prtime.h"
#include <algorithm>
#include <errno.h>
#include <fstream>
#include <ostream>
#include <set>
#include <sstream>
#include <string_view>
#include <type_traits>
// To simplify other code in this file, define a helper definition to avoid
// repeating the same preprocessor checks.
// The signals that we use to control the profiler conflict with the signals
// used to control the code coverage tool. Therefore, if coverage is enabled,
// we need to disable our own signal handling mechanisms.
#ifndef MOZ_CODE_COVERAGE
# ifdef XP_WIN
// TODO: Add support for windows "signal"-like behaviour. See Bug 1867328.
# elif
defined(GP_OS_darwin) ||
defined(GP_OS_linux) || \
defined(GP_OS_android) ||
defined(GP_OS_freebsd)
// Specify the specific platforms that we want to support
# define GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL 1
# else
// No support on this unknown platform!
# endif
#endif
// We need some extra includes if we're supporting async posix signals
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
# include <signal.h>
# include <fcntl.h>
# include <unistd.h>
# include <errno.h>
# include <pthread.h>
#endif
#if defined(GP_OS_android)
# include
"JavaExceptions.h"
# include
"mozilla/java/GeckoJavaSamplerNatives.h"
# include
"mozilla/jni/Refs.h"
#endif
#if defined(XP_MACOSX)
# include
"nsCocoaFeatures.h"
#endif
#if defined(GP_PLAT_amd64_darwin)
# include <cpuid.h>
#endif
#if defined(GP_OS_windows)
# include <processthreadsapi.h>
// GetThreadInformation is not available on Windows 7.
WINBASEAPI
BOOL WINAPI GetThreadInformation(
_In_ HANDLE hThread, _In_ THREAD_INFORMATION_CLASS ThreadInformationClass,
_Out_writes_bytes_(ThreadInformationSize) LPVOID ThreadInformation,
_In_ DWORD ThreadInformationSize);
#endif
// Win32 builds always have frame pointers, so FramePointerStackWalk() always
// works.
#if defined(GP_PLAT_x86_windows)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif
// Win64 builds always omit frame pointers, so we use the slower
// MozStackWalk(), which works in that case.
#if defined(GP_PLAT_amd64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_MOZ_STACK_WALK
#endif
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
// MozStackWalk().
#if defined(GP_PLAT_arm64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_MOZ_STACK_WALK
#endif
// Mac builds use FramePointerStackWalk(). Even if we build without
// frame pointers, we'll still get useful stacks in system libraries
// because those always have frame pointers.
// We don't use MozStackWalk() on Mac.
#if defined(GP_OS_darwin)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif
// Android builds use the ARM Exception Handling ABI to unwind.
#if defined(GP_PLAT_arm_linux) ||
defined(GP_PLAT_arm_android)
# define HAVE_NATIVE_UNWIND
# define USE_EHABI_STACKWALK
# include
"EHABIStackWalk.h"
#endif
// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
#if defined(GP_PLAT_amd64_linux) ||
defined(GP_PLAT_x86_linux) || \
defined(GP_PLAT_amd64_android) ||
defined(GP_PLAT_x86_android) || \
defined(GP_PLAT_mips64_linux) ||
defined(GP_PLAT_arm64_linux) || \
defined(GP_PLAT_arm64_android) ||
defined(GP_PLAT_amd64_freebsd) || \
defined(GP_PLAT_arm64_freebsd)
# define HAVE_NATIVE_UNWIND
# define USE_LUL_STACKWALK
# include
"lul/LulMain.h"
# include
"lul/platform-linux-lul.h"
// On linux we use LUL for periodic samples and synchronous samples, but we use
// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
// (See the comment at the top of the file for a definition of
// periodic/synchronous/backtrace.).
//
// FramePointerStackWalk can produce incomplete stacks when the current entry is
// in a shared library without framepointers, however LUL can take a long time
// to initialize, which is undesirable for consumers of
// profiler_suspend_and_sample_thread like the Background Hang Reporter.
# if defined(MOZ_PROFILING)
# define USE_FRAME_POINTER_STACK_WALK
# endif
#endif
// We can only stackwalk without expensive initialization on platforms which
// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
// which can be expensive.
#if defined(USE_FRAME_POINTER_STACK_WALK) ||
defined(USE_MOZ_STACK_WALK)
# define HAVE_FASTINIT_NATIVE_UNWIND
#endif
#ifdef MOZ_VALGRIND
# include <valgrind/memcheck.h>
#else
# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((
void)0)
#endif
#if defined(GP_OS_linux) ||
defined(GP_OS_android) ||
defined(GP_OS_freebsd)
# include <ucontext.h>
#endif
using namespace mozilla;
using namespace mozilla::literals::ProportionValue_literals;
using mozilla::profiler::detail::RacyFeatures;
using ThreadRegistration = mozilla::profiler::ThreadRegistration;
using ThreadRegistrationInfo = mozilla::profiler::ThreadRegistrationInfo;
using ThreadRegistry = mozilla::profiler::ThreadRegistry;
LazyLogModule gProfilerLog(
"prof");
ProfileChunkedBuffer& profiler_get_core_buffer() {
// Defer to the Base Profiler in mozglue to create the core buffer if needed,
// and keep a reference here, for quick access in xul.
static ProfileChunkedBuffer& sProfileChunkedBuffer =
baseprofiler::profiler_get_core_buffer();
return sProfileChunkedBuffer;
}
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
// Control character to start the profiler ('g' for "go"!)
static const char sAsyncSignalControlCharStart =
'g';
// Control character to stop the profiler ('s' for "stop"!)
static const char sAsyncSignalControlCharStop =
's';
// This is a file descriptor that is the "write" end of the POSIX pipe that we
// use to start the profiler. It is written to in profiler_start_signal_handler
// and read from in AsyncSignalControlThread
static mozilla::Atomic<
int, mozilla::MemoryOrdering::Relaxed>
sAsyncSignalControlWriteFd(-1);
// Atomic flag to stop the profiler from within the sampling loop
mozilla::Atomic<
bool, mozilla::MemoryOrdering::Relaxed> gStopAndDumpFromSignal(
false);
#endif
// Forward declare the function to call when we need to dump + stop from within
// the async control thread
void profiler_dump_and_stop();
// Forward declare the function to call when we need to start the profiler.
void profiler_start_from_signal();
mozilla::Atomic<
int, mozilla::MemoryOrdering::Relaxed> gSkipSampling;
#if defined(GP_OS_android)
class GeckoJavaSampler
:
public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
private:
GeckoJavaSampler();
public:
static double GetProfilerTime() {
if (!profiler_is_active()) {
return 0.0;
}
return profiler_time();
};
static void JavaStringArrayToCharArray(jni::ObjectArray::Param& aJavaArray,
Vector<
const char*>& aCharArray,
JNIEnv* aJni) {
int arraySize = aJavaArray->Length();
for (
int i = 0; i < arraySize; i++) {
jstring javaString =
(jstring)(aJni->GetObjectArrayElement(aJavaArray.Get(), i));
const char* filterString = aJni->GetStringUTFChars(javaString, 0);
// FIXME. These strings are leaked.
MOZ_RELEASE_ASSERT(aCharArray.append(filterString));
}
}
static void StartProfiler(jni::ObjectArray::Param aFiltersArray,
jni::ObjectArray::Param aFeaturesArray) {
JNIEnv* jni = jni::GetEnvForThread();
Vector<
const char*> filtersTemp;
Vector<
const char*> featureStringArray;
JavaStringArrayToCharArray(aFiltersArray, filtersTemp, jni);
JavaStringArrayToCharArray(aFeaturesArray, featureStringArray, jni);
uint32_t features = 0;
features = ParseFeaturesFromStringArray(featureStringArray.begin(),
featureStringArray.length());
// 128 * 1024 * 1024 is the entries preset that is given in
// devtools/client/performance-new/shared/background.sys.mjs
profiler_start(PowerOfTwo32(128 * 1024 * 1024), 5.0, features,
filtersTemp.begin(), filtersTemp.length(), 0, Nothing());
}
static void StopProfiler(jni::Object::Param aGeckoResult) {
auto result = java::GeckoResult::LocalRef(aGeckoResult);
profiler_pause();
nsCOMPtr<nsIProfiler> nsProfiler(
do_GetService(
"@mozilla.org/tools/profiler;1"));
nsProfiler->GetProfileDataAsGzippedArrayBufferAndroid(0)->Then(
GetMainThreadSerialEventTarget(), __func__,
[result](FallibleTArray<uint8_t> compressedProfile) {
result->Complete(jni::ByteArray::
New(
reinterpret_cast<
const int8_t*>(compressedProfile.Elements()),
compressedProfile.Length()));
// Done with capturing a profile. Stop the profiler.
profiler_stop();
},
[result](nsresult aRv) {
char errorString[9];
sprintf(errorString,
"%08x", uint32_t(aRv));
result->CompleteExceptionally(
mozilla::java::sdk::IllegalStateException::
New(errorString)
.Cast<jni::Throwable>());
// Failed to capture a profile. Stop the profiler.
profiler_stop();
});
}
};
#endif
constexpr
static bool ValidateFeatures() {
int expectedFeatureNumber = 0;
// Feature numbers should start at 0 and increase by 1 each.
#define CHECK_FEATURE(n_, str_, Name_, desc_) \
if ((n_) != expectedFeatureNumber) { \
return false; \
} \
++expectedFeatureNumber;
PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
#undef CHECK_FEATURE
return true;
}
static_assert(ValidateFeatures(),
"Feature list is invalid");
// Return all features that are available on this platform.
static uint32_t AvailableFeatures() {
uint32_t features = 0;
#define ADD_FEATURE(n_, str_, Name_, desc_) \
ProfilerFeature::Set
##Name_(features);
// Add all the possible features.
PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
#undef ADD_FEATURE
// Now remove features not supported on this platform/configuration.
#if !
defined(GP_OS_android)
ProfilerFeature::ClearJava(features);
#endif
#if !
defined(HAVE_NATIVE_UNWIND)
ProfilerFeature::ClearStackWalk(features);
#endif
#if defined(MOZ_REPLACE_MALLOC) &&
defined(MOZ_PROFILER_MEMORY)
if (getenv(
"XPCOM_MEM_BLOAT_LOG")) {
DEBUG_LOG(
"XPCOM_MEM_BLOAT_LOG is set, disabling native allocations.");
// The memory hooks are available, but the bloat log is enabled, which is
// not compatible with the native allocations tracking. See the comment in
// enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for
// more information.
ProfilerFeature::ClearNativeAllocations(features);
}
#else
// The memory hooks are not available.
ProfilerFeature::ClearNativeAllocations(features);
#endif
#if !
defined(MOZ_MEMORY)
or !
defined(MOZ_PROFILER_MEMORY)
ProfilerFeature::ClearMemory(features);
#endif
#if !
defined(GP_OS_windows)
ProfilerFeature::ClearNoTimerResolutionChange(features);
#endif
#if !
defined(HAVE_CPU_FREQ_SUPPORT)
ProfilerFeature::ClearCPUFrequency(features);
#endif
return features;
}
// Default features common to all contexts (even if not available).
static constexpr uint32_t DefaultFeatures() {
return ProfilerFeature::Java | ProfilerFeature::JS |
ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
ProfilerFeature::Screenshots | ProfilerFeature::ProcessCPU;
}
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
// available).
static constexpr uint32_t StartupExtraDefaultFeatures() {
// Enable file I/Os by default for startup profiles as startup is heavy on
// I/O operations.
return ProfilerFeature::FileIOAll | ProfilerFeature::IPCMessages;
}
Json::String ToCompactString(
const Json::Value& aJsonValue) {
Json::StreamWriterBuilder builder;
// No indentations, and no newlines.
builder[
"indentation"] =
"";
// This removes spaces after colons.
builder[
"enableYAMLCompatibility"] =
false;
// Only 6 digits after the decimal point; timestamps in ms have ns precision.
builder[
"precision"] = 6;
builder[
"precisionType"] =
"decimal";
return Json::writeString(builder, aJsonValue);
}
MOZ_RUNINIT
/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex
ProfilingLog::gMutex;
MOZ_RUNINIT
/* static */ mozilla::UniquePtr<Json::Value> ProfilingLog::gLog;
/* static */ void ProfilingLog::Init() {
mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex};
MOZ_ASSERT(!gLog);
gLog = mozilla::MakeUniqueFallible<Json::Value>(Json::objectValue);
if (gLog) {
(*gLog)[Json::StaticString{
"profilingLogBegin" TIMESTAMP_JSON_SUFFIX}] =
ProfilingLog::Timestamp();
}
}
/* static */ void ProfilingLog::Destroy() {
mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex};
MOZ_ASSERT(gLog);
gLog = nullptr;
}
/* static */ bool ProfilingLog::IsLockedOnCurrentThread() {
return gMutex.IsLockedOnCurrentThread();
}
// RAII class to lock the profiler mutex.
// It provides a mechanism to determine if it is locked or not in order for
// memory hooks to avoid re-entering the profiler locked state.
// Locking order: Profiler, ThreadRegistry, ThreadRegistration.
class MOZ_RAII PSAutoLock {
public:
PSAutoLock()
: mLock([]() -> mozilla::baseprofiler::detail::BaseProfilerMutex& {
// In DEBUG builds, *before* we attempt to lock gPSMutex, we want to
// check that the ThreadRegistry, ThreadRegistration, and ProfilingLog
// mutexes are *not* locked on this thread, to avoid inversion
// deadlocks.
MOZ_ASSERT(!ThreadRegistry::IsRegistryMutexLockedOnCurrentThread());
MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread());
MOZ_ASSERT(!ProfilingLog::IsLockedOnCurrentThread());
return gPSMutex;
}()) {}
PSAutoLock(
const PSAutoLock&) =
delete;
void operator=(
const PSAutoLock&) =
delete;
static bool IsLockedOnCurrentThread() {
return gPSMutex.IsLockedOnCurrentThread();
}
private:
static mozilla::baseprofiler::detail::BaseProfilerMutex gPSMutex;
mozilla::baseprofiler::detail::BaseProfilerAutoLock mLock;
};
MOZ_RUNINIT
/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex
PSAutoLock::gPSMutex{
"Gecko Profiler mutex"};
// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
// fields.
typedef const PSAutoLock& PSLockRef;
#define PS_GET(type_, name_) \
static type_ name_(PSLockRef) { \
MOZ_ASSERT(sInstance); \
return sInstance->m
##name_; \
}
#define PS_GET_LOCKLESS(type_, name_) \
static type_ name_() { \
MOZ_ASSERT(sInstance); \
return sInstance->m
##name_; \
}
#define PS_GET_AND_SET(type_, name_) \
PS_GET(type_, name_) \
static void Set
##name_(PSLockRef, type_ a
##name_) { \
MOZ_ASSERT(sInstance); \
sInstance->m
##name_ = a
##name_; \
}
static constexpr size_t MAX_JS_FRAMES =
mozilla::profiler::ThreadRegistrationData::MAX_JS_FRAMES;
using JsFrame = mozilla::profiler::ThreadRegistrationData::JsFrame;
using JsFrameBuffer = mozilla::profiler::ThreadRegistrationData::JsFrameBuffer;
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
// ASYNC POSIX SIGNAL HANDLING SUPPORT
//
// Integrating POSIX signals
// (https://man7.org/linux/man-pages/man7/signal.7.html) into a complex
// multi-threaded application such as Firefox can be a tricky proposition.
// Signals are delivered by the operating system to a program, which then
// invokes a signal handler
// (https://man7.org/linux/man-pages/man2/sigaction.2.html) outside the normal
// flow of control. This handler is responsible for performing operations in
// response to the signal. If there is no "custom" handler defined, then default
// behaviour is triggered, which usually results in a terminated program.
//
// As signal handlers interrupt the normal flow of control, Firefox may not be
// in a safe state while the handler is running (e.g. it may be halfway through
// a garbage collection cycle, or a critical lock may be held by the current
// thread). This is something we must be aware of while writing one, and we are
// additionally limited in terms of which POSIX functions we can call to those
// which are async signal safe
// (https://man7.org/linux/man-pages/man7/signal-safety.7.html).
//
// In the context of Firefox, this presents a number of details that we must be
// aware of:
//
// * We are very limited by what we can call when we handle a signal: Many
// functions in Firefox, and in the profiler specifically, allocate memory
// when called. Allocating memory is specifically **not** async-signal-safe,
// and so any functions that allocate should not be called from a signal
// handler.
//
// * We need to be careful with how we communicate to other threads in the
// process. The signal handler runs asynchronously, interrupting the current
// thread of execution. Communication should therefore use atomics or other
// concurrency constructs to ensure that data is read and written correctly.
// We should avoid taking locks, as we may easily deadlock while within the
// signal handler.
//
// * We cannot use the usual Firefox mechanisms for triggering behaviour in
// other threads. For instance, tools such as ``NS_DispatchToMainThread``
// allocate memory when called, which is not allowed within a signal handler.
//
// We solve these constraints by introducing a new thread within the Firefox
// profiler, the AsyncSignalControlThread which is responsible for carrying out
// the actions triggered by a signal handler. We communicate between handlers
// and this thread with the use of a libc pipe
// (https://pubs.opengroup.org/onlinepubs/9699919799/functions/write.html#tag_16_685_08).
// Writing to a pipe is async-signal-safe, so we can do so from a signal
// handler, and we can set the pipe to be "blocking", meaning that when our
// control thread tries to read it will block at the OS level (consuming no CPU)
// until the handler writes to it. This is in contrast to (e.g.) an atomic
// variable, where our thread would have to "busy wait" for it to be set.
//
// We have one "control" thread per process, and use a single byte for messages
// we send. Writes to pipes are atomic if the size is less than or equal to
// ``PIPE_BUF``, which (although implementation defined) in our case is always
// one, thus trivially atomic.
//
// The control flow for a typical Firefox session in which a user starts and
// stops profiling using POSIX signals therefore looks something like the
// following:
//
// * Profiler initialization.
//
// * The main thread of each process starts the signal control thread, and
// initialises signal handlers for ``SIGUSR1`` and ``SIGSUR2``.
// * The signal control thread sets up pipes for communication, and begins
// reading, blocking itself.
//
// * *After some time...*
// * The user sends ``SIGUSR1`` to Firefox, e.g. using ``kill -s USR1 <firefox
// pid>``
//
// * The profiler_start_signal_handler signal handler for ``SIGUSR1`` is
// triggered by the operating system. This writes the "start" control
// character to the communication pipe and returns.
// * The signal control thread wakes up, as there is now data on the pipe.
// * The control thread recognises the "start" character, and starts the
// profiler with a set of default presets.
// * The control thread loops, and goes back to waiting on the pipe.
//
// * *The user uses Firefox, or waits for it to do something...*
// * The user sends ``SIGUSR2`` to Firefox, e.g. using ``kill -s USR1 <firefox
// pid>``
//
// * The profiler_stop_signal_handler signal handler for ``SIGUSR2`` is
// triggered by the operating system. This writes the "stop" control
// character to the communication pipe and returns.
// * The signal control thread wakes up, as there is now data on the pipe.
// * The control thread recognises the "stop" character, and calls
// profiler_stop_signal_handler to dump the profile to disk.
// * The control thread loops, and goes back to waiting on the pipe.
//
// * *The user can now start another profiling session...*
//
// Forward declare this, so we can call it from the constructor.
static void* AsyncSignalControlThreadEntry(
void* aArg);
// Define our platform specific async (posix) signal control thread here.
class AsyncSignalControlThread {
public:
AsyncSignalControlThread() : mThread() {
// Try to open a pipe for this to communicate with. If we can't do this,
// then we give up and return, as there's no point continuing without
// being able to communicate
int pipeFds[2];
if (pipe(pipeFds)) {
LOG(
"Profiler AsyncSignalControlThread failed to create a pipe.");
return;
}
// Close this pipe on calls to exec().
fcntl(pipeFds[0], F_SETFD, FD_CLOEXEC);
fcntl(pipeFds[1], F_SETFD, FD_CLOEXEC);
// Write the reading side to mFd, and the writing side to the global atomic
mFd = pipeFds[0];
sAsyncSignalControlWriteFd = pipeFds[1];
// We don't really care about stack size, as it should be minimal, so
// leave the pthread attributes as a nullptr, i.e. choose the default.
pthread_attr_t* attr_ptr = nullptr;
if (pthread_create(&mThread, attr_ptr, AsyncSignalControlThreadEntry,
this) != 0) {
MOZ_CRASH(
"pthread_create failed");
}
};
~AsyncSignalControlThread() {
// Derived from code in nsDumpUtils.cpp. Comment reproduced here for
// poisterity: Close sAsyncSignalControlWriteFd /after/ setting the fd to
// -1. Otherwise we have the (admittedly far-fetched) race where we
//
// 1) close sAsyncSignalControlWriteFd
// 2) open a new fd with the same number as sAsyncSignalControlWriteFd
// had.
// 3) receive a signal, then write to the fd.
int asyncSignalControlWriteFd = sAsyncSignalControlWriteFd.exchange(-1);
// This will unblock the "read" in StartWatching.
close(asyncSignalControlWriteFd);
// Finally, exit the thread.
pthread_join(mThread, nullptr);
};
void Watch() {
char msg[1];
ssize_t nread;
while (
true) {
// Try reading from the pipe. This will block until something is written:
nread = read(mFd, msg,
sizeof(msg));
if (nread == -1 && errno == EINTR) {
// nread == -1 and errno == EINTR means that `read` was interrupted
// by a signal before reading any data. This is likely because the
// profiling thread interrupted us (with SIGPROF). We can safely ignore
// this and "go around" the loop again to try and read.
continue;
}
if (nread == -1 && errno != EINTR) {
// nread == -1 and errno != EINTR means that `read` has failed in some
// way that we can't recover from. In this case, all we can do is give
// up, and quit the watcher, as the pipe is likely broken.
LOG(
"Error (%d) when reading in AsyncSignalControlThread", errno);
return;
}
if (nread == 0) {
// nread == 0 signals that the other end of the pipe has been cleanly
// closed. Close our end, and exit the reading loop.
close(mFd);
return;
}
// If we reach here, nread != 0 and nread != -1. This means that we
// should have read at least one byte, which should be a control byte
// for the profiler.
// It *might* happen that `read` is interrupted by the sampler thread
// after successfully reading. If this occurs, read returns the number
// of bytes read. As anything other than 1 is wrong for us, we can
// always assume that we can read whatever `read` read.
MOZ_RELEASE_ASSERT(nread == 1);
if (msg[0] == sAsyncSignalControlCharStart) {
// Check to see if the profiler is already running. This is done within
// `profiler_start` anyway, but if we check sooner we avoid running all
// the other code between now and that check.
if (!profiler_is_active()) {
profiler_start_from_signal();
}
}
else if (msg[0] == sAsyncSignalControlCharStop) {
// Check to see whether the profiler is even running before trying to
// stop the profiler. Most other methods of stopping the profiler (i.e.
// those through nsProfiler etc) already know whether or not the
// profiler is running, so don't try and stop it if it's already
// running. Signal-stopping doesn't have this constraint, so we should
// check just in case there is a codepath followed by
// `profiler_dump_and_stop` that breaks if we stop while stopped.
if (profiler_is_active()) {
profiler_dump_and_stop();
}
}
else {
LOG(
"AsyncSignalControlThread recieved unknown control signal: %c",
msg[0]);
}
}
};
private:
// The read side of the pipe that we use to communicate from a signal handler
// to the AsyncSignalControlThread
int mFd;
// The thread handle for the async signal control thread
// Note, that unlike the sampler thread, this is currently a posix-only
// feature. Therefore, we don't bother to have a windows equivalent - we
// just use a pthread_t
pthread_t mThread;
};
static void* AsyncSignalControlThreadEntry(
void* aArg) {
NS_SetCurrentThreadName(
"AsyncSignalControlThread");
auto* thread =
static_cast<AsyncSignalControlThread*>(aArg);
thread->Watch();
return nullptr;
}
#endif
// All functions in this file can run on multiple threads unless they have an
// NS_IsMainThread() assertion.
// This class contains the profiler's core global state, i.e. that which is
// valid even when the profiler is not active. Most profile operations can't do
// anything useful when this class is not instantiated, so we release-assert
// its non-nullness in all such operations.
//
// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
// PSAutoLock reference as an argument as proof that the gPSMutex is currently
// locked. This makes it clear when gPSMutex is locked and helps avoid
// accidental unlocked accesses to global state. There are ways to circumvent
// this mechanism, but please don't do so without *very* good reason and a
// detailed explanation.
//
// The exceptions to this rule:
//
// - mProcessStartTime, because it's immutable;
class CorePS {
private:
#ifdef MOZ_PERFETTO
class PerfettoObserver :
public perfetto::TrackEventSessionObserver {
public:
PerfettoObserver() { perfetto::TrackEvent::AddSessionObserver(
this); }
~PerfettoObserver() { perfetto::TrackEvent::RemoveSessionObserver(
this); }
void OnStart(
const perfetto::DataSourceBase::StartArgs&) override {
mozilla::profiler::detail::RacyFeatures::SetPerfettoTracingActive();
}
void OnStop(
const perfetto::DataSourceBase::StopArgs&) override {
mozilla::profiler::detail::RacyFeatures::SetPerfettoTracingInactive();
}
} perfettoObserver;
#endif
CorePS()
: mProcessStartTime(TimeStamp::ProcessCreation()),
mMaybeBandwidthCounter(nullptr)
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
,
mAsyncSignalControlThread(nullptr)
#endif
#ifdef USE_LUL_STACKWALK
,
mLul(nullptr)
#endif
{
MOZ_ASSERT(NS_IsMainThread(),
"CorePS must be created from the main thread");
}
~CorePS() {
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
delete mAsyncSignalControlThread;
#endif
#ifdef USE_LUL_STACKWALK
delete sInstance->mLul;
delete mMaybeBandwidthCounter;
#endif
}
public:
static void Create(PSLockRef aLock) {
MOZ_ASSERT(!sInstance);
sInstance =
new CorePS();
}
static void Destroy(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
delete sInstance;
sInstance = nullptr;
}
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
// being locked. This is because CorePS is instantiated so early on the main
// thread that we don't have to worry about it being racy.
static bool Exists() {
return !!sInstance; }
static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
size_t& aProfSize, size_t& aLulSize) {
MOZ_ASSERT(sInstance);
aProfSize += aMallocSizeOf(sInstance);
aProfSize += ThreadRegistry::SizeOfIncludingThis(aMallocSizeOf);
for (
auto& registeredPage : sInstance->mRegisteredPages) {
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
}
// Measurement of the following things may be added later if DMD finds it
// is worthwhile:
// - CorePS::mRegisteredPages itself (its elements' children are
// measured above)
#if defined(USE_LUL_STACKWALK)
if (lul::LUL* lulPtr = sInstance->mLul; lulPtr) {
aLulSize += lulPtr->SizeOfIncludingThis(aMallocSizeOf);
}
#endif
}
// No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
PS_GET(JsFrameBuffer&, JsFrames)
PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
static void AppendRegisteredPage(PSLockRef,
RefPtr<PageInformation>&& aRegisteredPage) {
MOZ_ASSERT(sInstance);
struct RegisteredPageComparator {
PageInformation* aA;
bool operator()(PageInformation* aB)
const {
return aA->Equals(aB); }
};
auto foundPageIter = std::find_if(
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
RegisteredPageComparator{aRegisteredPage.get()});
if (foundPageIter != sInstance->mRegisteredPages.end()) {
if ((*foundPageIter)->Url().EqualsLiteral(
"about:blank")) {
// When a BrowsingContext is loaded, the first url loaded in it will be
// about:blank, and if the principal matches, the first document loaded
// in it will share an inner window. That's why we should delete the
// intermittent about:blank if they share the inner window.
sInstance->mRegisteredPages.erase(foundPageIter);
}
else {
// Do not register the same page again.
return;
}
}
MOZ_RELEASE_ASSERT(
sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
}
static void RemoveRegisteredPage(PSLockRef,
uint64_t aRegisteredInnerWindowID) {
MOZ_ASSERT(sInstance);
// Remove RegisteredPage from mRegisteredPages by given inner window ID.
sInstance->mRegisteredPages.eraseIf([&](
const RefPtr<PageInform
ation>& rd) {
return rd->InnerWindowID() == aRegisteredInnerWindowID;
});
}
static void ClearRegisteredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
sInstance->mRegisteredPages.clear();
}
PS_GET(const Vector<BaseProfilerCount*>&, Counters)
static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
MOZ_ASSERT(sInstance);
// we don't own the counter; they may be stored in static objects
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
}
static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
// we may be called to remove a counter after the profiler is stopped or
// late in shutdown.
if (sInstance) {
auto* counter = std::find(sInstance->mCounters.begin(),
sInstance->mCounters.end(), aCounter);
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
sInstance->mCounters.erase(counter);
}
}
#ifdef USE_LUL_STACKWALK
static lul::LUL* Lul() {
MOZ_RELEASE_ASSERT(sInstance);
return sInstance->mLul;
}
static void SetLul(UniquePtr<lul::LUL> aLul) {
MOZ_RELEASE_ASSERT(sInstance);
MOZ_RELEASE_ASSERT(
sInstance->mLul.compareExchange(nullptr, aLul.release()));
}
#endif
PS_GET_AND_SET(const nsACString&, ProcessName)
PS_GET_AND_SET(const nsACString&, ETLDplus1)
#if !defined(XP_WIN)
PS_GET_AND_SET(const Maybe<nsCOMPtr<nsIFile>>&, AsyncSignalDumpDirectory)
#endif
static void SetBandwidthCounter(ProfilerBandwidthCounter* aBandwidthCounter) {
MOZ_ASSERT(sInstance);
sInstance->mMaybeBandwidthCounter = aBandwidthCounter;
}
static ProfilerBandwidthCounter* GetBandwidthCounter() {
MOZ_ASSERT(sInstance);
return sInstance->mMaybeBandwidthCounter;
}
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
static void SetAsyncSignalControlThread(
AsyncSignalControlThread* aAsyncSignalControlThread) {
MOZ_ASSERT(sInstance);
sInstance->mAsyncSignalControlThread = aAsyncSignalControlThread;
}
#endif
private:
// The singleton instance
static CorePS* sInstance;
// The time that the process started.
const TimeStamp mProcessStartTime;
// Network bandwidth counter for the Bandwidth feature.
ProfilerBandwidthCounter* mMaybeBandwidthCounter;
// Info on all the registered pages.
// InnerWindowIDs in mRegisteredPages are unique.
Vector<RefPtr<PageInformation>> mRegisteredPages;
// Non-owning pointers to all active counters
Vector<BaseProfilerCount*> mCounters;
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
// Background thread for communicating with async signal handlers
AsyncSignalControlThread* mAsyncSignalControlThread;
#endif
#ifdef USE_LUL_STACKWALK
// LUL's state. Null prior to the first activation, non-null thereafter.
// Owned by this CorePS.
mozilla::Atomic<lul::LUL*> mLul;
#endif
// Process name, provided by child process initialization code.
nsAutoCString mProcessName;
// Private name, provided by child process initialization code (eTLD+1 in
// fission)
nsAutoCString mETLDplus1;
// This memory buffer is used by the MergeStacks mechanism. Previously it was
// stack allocated, but this led to a stack overflow, as it was too much
// memory. Here the buffer can be pre-allocated, and shared with the
// MergeStacks feature as needed. MergeStacks is only run while holding the
// lock, so it is safe to have only one instance allocated for all of the
// threads.
JsFrameBuffer mJsFrames;
// Cached download directory for when we need to dump profiles to disk.
#if !defined(XP_WIN)
Maybe<nsCOMPtr<nsIFile>> mAsyncSignalDumpDirectory;
#endif
};
CorePS* CorePS::sInstance = nullptr;
void locked_profiler_add_sampled_counter(PSLockRef aLock,
BaseProfilerCount* aCounter) {
CorePS::AppendCounter(aLock, aCounter);
}
void locked_profiler_remove_sampled_counter(PSLockRef aLock,
BaseProfilerCount* aCounter) {
// Note: we don't enforce a final sample, though we could do so if the
// profiler was active
CorePS::RemoveCounter(aLock, aCounter);
}
class SamplerThread;
static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
double aInterval, uint32_t aFeatures);
struct LiveProfiledThreadData {
UniquePtr<ProfiledThreadData> mProfiledThreadData;
};
// This class contains the profiler's global state that is valid only when the
// profiler is active. When not instantiated, the profiler is inactive.
//
// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
// CorePS.
//
class ActivePS {
private:
constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
scBytesPerEntry / scMinimumNumberOfChunks,
size_t(scMaximumChunkSize)));
}
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
// Filter out any features unavailable in this platform/configuration.
aFeatures &= AvailableFeatures();
// Some features imply others.
if (aFeatures & ProfilerFeature::FileIOAll) {
aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
} else if (aFeatures & ProfilerFeature::FileIO) {
aFeatures |= ProfilerFeature::MainThreadIO;
}
if (aFeatures & ProfilerFeature::CPUAllThreads) {
aFeatures |= ProfilerFeature::CPUUtilization;
}
if (aFeatures & ProfilerFeature::Tracing) {
aFeatures &= ~ProfilerFeature::CPUUtilization;
aFeatures &= ~ProfilerFeature::Memory;
aFeatures |= ProfilerFeature::NoStackSampling;
aFeatures |= ProfilerFeature::JS;
}
return aFeatures;
}
bool ShouldInterposeIOs() {
return ProfilerFeature::HasMainThreadIO(mFeatures) ||
ProfilerFeature::HasFileIO(mFeatures) ||
ProfilerFeature::HasFileIOAll(mFeatures);
}
ActivePS(
PSLockRef aLock, const TimeStamp& aProfilingStartTime,
PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
const Maybe<double>& aDuration,
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull)
: mProfilingStartTime(aProfilingStartTime),
mGeneration(sNextGeneration++),
mCapacity(aCapacity),
mDuration(aDuration),
mInterval(aInterval),
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
mActiveTabID(aActiveTabID),
mProfileBufferChunkManager(
aChunkManagerOrNull
? std::move(aChunkManagerOrNull)
: MakeUnique<ProfileBufferChunkManagerWithLocalLimit>(
size_t(ClampToAllowedEntries(aCapacity.Value())) *
scBytesPerEntry,
ChunkSizeForEntries(aCapacity.Value()))),
mProfileBuffer([this]() -> ProfileChunkedBuffer& {
ProfileChunkedBuffer& coreBuffer = profiler_get_core_buffer();
coreBuffer.SetChunkManagerIfDifferent(*mProfileBufferChunkManager);
return coreBuffer;
}()),
mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures)
? new ProcessCPUCounter(aLock)
: nullptr),
mMaybePowerCounters(nullptr),
mMaybeCPUFreq(nullptr),
// The new sampler thread doesn't start sampling immediately because the
// main loop within Run() is blocked until this function's caller
// unlocks gPSMutex.
mSamplerThread(
NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)),
mIsPaused(false),
mIsSamplingPaused(false) {
ProfilingLog::Init();
// Deep copy and lower-case aFilters.
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount));
for (uint32_t i = 0; i < aFilterCount; ++i) {
mFilters[i] = aFilters[i];
mFiltersLowered[i].reserve(mFilters[i].size());
std::transform(mFilters[i].cbegin(), mFilters[i].cend(),
std::back_inserter(mFiltersLowered[i]), ::tolower);
}
#if !defined(RELEASE_OR_BETA)
if (ShouldInterposeIOs()) {
// We need to register the observer on the main thread, because we want
// to observe IO that happens on the main thread.
// IOInterposer needs to be initialized before calling
// IOInterposer::Register or our observer will be silently dropped.
if (NS_IsMainThread()) {
IOInterposer::Init();
IOInterposer::Register(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::ActivePS", []() {
// Note: This could theoretically happen after ActivePS gets
// destroyed, but it's ok:
// - The Observer always checks that the profiler is (still)
// active before doing its work.
// - The destruction should happen on the same thread as this
// construction, so the un-registration will also be dispatched
// and queued on the main thread, and run after this.
IOInterposer::Init();
IOInterposer::Register(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
}
#endif
if (ProfilerFeature::HasPower(aFeatures)) {
mMaybePowerCounters = new PowerCounters();
for (const auto& powerCounter : mMaybePowerCounters->GetCounters()) {
locked_profiler_add_sampled_counter(aLock, powerCounter.get());
}
}
if (ProfilerFeature::HasCPUFrequency(aFeatures)) {
mMaybeCPUFreq = new ProfilerCPUFreq();
}
}
~ActivePS() {
MOZ_ASSERT(
!mMaybeProcessCPUCounter,
"mMaybeProcessCPUCounter should have been deleted before ~ActivePS()");
MOZ_ASSERT(
!mMaybePowerCounters,
"mMaybePowerCounters should have been deleted before ~ActivePS()");
MOZ_ASSERT(!mMaybeCPUFreq,
"mMaybeCPUFreq should have been deleted before ~ActivePS()");
#if defined(MOZ_MEMORY) && defined(MOZ_PROFILER_MEMORY)
MOZ_ASSERT(!mMemoryCounter,
"mMemoryCounter should have been deleted before ~ActivePS()");
#endif
#if !defined(RELEASE_OR_BETA)
if (ShouldInterposeIOs()) {
// We need to unregister the observer on the main thread, because that's
// where we've registered it.
if (NS_IsMainThread()) {
IOInterposer::Unregister(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::~ActivePS", []() {
IOInterposer::Unregister(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
}
#endif
if (mProfileBufferChunkManager) {
// We still control the chunk manager, remove it from the core buffer.
profiler_get_core_buffer().ResetChunkManager();
}
ProfilingLog::Destroy();
}
bool ThreadSelected(const char* aThreadName) {
if (mFiltersLowered.empty()) {
return true;
}
std::string name = aThreadName;
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
for (const auto& filter : mFiltersLowered) {
if (filter == "*") {
return true;
}
// Crude, non UTF-8 compatible, case insensitive substring search
if (name.find(filter) != std::string::npos) {
return true;
}
// If the filter is "pid:<my pid>", profile all threads.
if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) {
return true;
}
}
return false;
}
public:
static void Create(
PSLockRef aLock, const TimeStamp& aProfilingStartTime,
PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
const Maybe<double>& aDuration,
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull) {
MOZ_ASSERT(!sInstance);
sInstance = new ActivePS(aLock, aProfilingStartTime, aCapacity, aInterval,
aFeatures, aFilters, aFilterCount, aActiveTabID,
aDuration, std::move(aChunkManagerOrNull));
}
[[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
if (sInstance->mMaybeProcessCPUCounter) {
locked_profiler_remove_sampled_counter(
aLock, sInstance->mMaybeProcessCPUCounter);
delete sInstance->mMaybeProcessCPUCounter;
sInstance->mMaybeProcessCPUCounter = nullptr;
}
if (sInstance->mMaybePowerCounters) {
for (const auto& powerCounter :
sInstance->mMaybePowerCounters->GetCounters()) {
locked_profiler_remove_sampled_counter(aLock, powerCounter.get());
}
delete sInstance->mMaybePowerCounters;
sInstance->mMaybePowerCounters = nullptr;
}
if (sInstance->mMaybeCPUFreq) {
delete sInstance->mMaybeCPUFreq;
sInstance->mMaybeCPUFreq = nullptr;
}
#if defined(MOZ_MEMORY) && defined(MOZ_PROFILER_MEMORY)
if (sInstance->mMemoryCounter) {
locked_profiler_remove_sampled_counter(aLock,
sInstance->mMemoryCounter.get());
sInstance->mMemoryCounter = nullptr;
}
#endif
ProfilerBandwidthCounter* counter = CorePS::GetBandwidthCounter();
if (counter && counter->IsRegistered()) {
// Because profiler_count_bandwidth_bytes does a racy
// profiler_feature_active check to avoid taking the lock,
// free'ing the memory of the counter would be crashy if the
// socket thread attempts to increment the counter while we are
// stopping the profiler.
// Instead, we keep the counter in CorePS and only mark it as
// unregistered so that the next attempt to count bytes
// will re-register it.
locked_profiler_remove_sampled_counter(aLock, counter);
counter->MarkUnregistered();
}
auto samplerThread = sInstance->mSamplerThread;
delete sInstance;
sInstance = nullptr;
return samplerThread;
}
static bool Exists(PSLockRef) { return !!sInstance; }
static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
const Maybe<double>& aDuration, double aInterval,
uint32_t aFeatures, const char** aFilters,
uint32_t aFilterCount, uint64_t aActiveTabID) {
MOZ_ASSERT(sInstance);
if (sInstance->mCapacity != aCapacity ||
sInstance->mDuration != aDuration ||
sInstance->mInterval != aInterval ||
sInstance->mFeatures != aFeatures ||
sInstance->mFilters.length() != aFilterCount ||
sInstance->mActiveTabID != aActiveTabID) {
return false;
}
for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
return false;
}
}
return true;
}
static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
MOZ_ASSERT(sInstance);
size_t n = aMallocSizeOf(sInstance);
n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it
// is worthwhile:
// - mLiveProfiledThreads (both the array itself, and the contents)
// - mDeadProfiledThreads (both the array itself, and the contents)
//
return n;
}
static ThreadProfilingFeatures ProfilingFeaturesForThread(
PSLockRef aLock, const ThreadRegistrationInfo& aInfo) {
MOZ_ASSERT(sInstance);
if (sInstance->ThreadSelected(aInfo.Name())) {
// This thread was selected by the user, record everything.
return ThreadProfilingFeatures::Any;
}
ThreadProfilingFeatures features = ThreadProfilingFeatures::NotProfiled;
if (ActivePS::FeatureCPUAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::CPUUtilization);
}
if (ActivePS::FeatureSamplingAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Sampling);
}
if (ActivePS::FeatureMarkersAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Markers);
}
return features;
}
[[nodiscard]] static bool AppendPostSamplingCallback(
PSLockRef, PostSamplingCallback&& aCallback);
// Writes out the current active configuration of the profile.
static void WriteActiveConfiguration(
PSLockRef aLock, JSONWriter& aWriter,
const Span<const char>& aPropertyName = MakeStringSpan("")) {
if (!sInstance) {
if (!aPropertyName.empty()) {
aWriter.NullProperty(aPropertyName);
} else {
aWriter.NullElement();
}
return;
};
if (!aPropertyName.empty()) {
aWriter.StartObjectProperty(aPropertyName);
} else {
aWriter.StartObjectElement();
}
{
aWriter.StartArrayProperty("features");
#define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_) \
if (profiler_feature_active(ProfilerFeature::Name_)) { \
aWriter.StringElement(str_); \
}
PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES)
#undef WRITE_ACTIVE_FEATURES
aWriter.EndArray();
}
{
aWriter.StartArrayProperty("threads");
for (const auto& filter : sInstance->mFilters) {
aWriter.StringElement(filter);
}
aWriter.EndArray();
}
{
// Now write all the simple values.
// The interval is also available on profile.meta.interval
aWriter.DoubleProperty("interval", sInstance->mInterval);
aWriter.IntProperty("capacity", sInstance->mCapacity.Value());
if (sInstance->mDuration) {
aWriter.DoubleProperty("duration", sInstance->mDuration.value());
}
// Here, we are converting uint64_t to double. Tab IDs are
// being created using `nsContentUtils::GenerateProcessSpecificId`, which
// is specifically designed to only use 53 of the 64 bits to be lossless
// when passed into and out of JS as a double.
aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
}
aWriter.EndObject();
}
PS_GET_LOCKLESS(TimeStamp, ProfilingStartTime)
PS_GET(uint32_t, Generation)
PS_GET(PowerOfTwo32, Capacity)
PS_GET(Maybe<double>, Duration)
PS_GET(double, Interval)
PS_GET(uint32_t, Features)
PS_GET(uint64_t, ActiveTabID)
#define PS_GET_FEATURE(n_, str_, Name_, desc_) \
static bool Feature##Name_(PSLockRef) { \
MOZ_ASSERT(sInstance); \
return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
}
PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
#undef PS_GET_FEATURE
static bool ShouldInstallMemoryHooks(PSLockRef) {
MOZ_ASSERT(sInstance);
return ProfilerFeature::ShouldInstallMemoryHooks(sInstance->mFeatures);
}
static uint32_t JSFlags(PSLockRef aLock) {
uint32_t Flags = 0;
Flags |=
FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
Flags |= FeatureJSAllocations(aLock)
? uint32_t(JSInstrumentationFlags::Allocations)
: 0;
return Flags;
}
PS_GET(const Vector<std::string>&, Filters)
PS_GET(const Vector<std::string>&, FiltersLowered)
// Not using PS_GET, because only the "Controlled" interface of
// `mProfileBufferChunkManager` should be exposed here.
static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
PSLockRef) {
MOZ_ASSERT(sInstance);
MOZ_ASSERT(sInstance->mProfileBufferChunkManager);
return *sInstance->mProfileBufferChunkManager;
}
static void FulfillChunkRequests(PSLockRef) {
MOZ_ASSERT(sInstance);
if (sInstance->mProfileBufferChunkManager) {
sInstance->mProfileBufferChunkManager->FulfillChunkRequests();
}
}
static ProfileBuffer& Buffer(PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mProfileBuffer;
}
static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mLiveProfiledThreads;
}
struct ProfiledThreadListElement {
TimeStamp mRegisterTime;
JSContext* mJSContext; // Null for unregistered threads.
ProfiledThreadData* mProfiledThreadData;
};
using ProfiledThreadList = Vector<ProfiledThreadListElement>;
// Returns a ProfiledThreadList with all threads that should be included in a
// profile, both for threads that are still registered, and for threads that
// have been unregistered but still have data in the buffer.
// The returned array is sorted by thread register time.
// Do not hold on to the return value past LockedRegistry.
static ProfiledThreadList ProfiledThreads(
ThreadRegistry::LockedRegistry& aLockedRegistry, PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ProfiledThreadList array;
MOZ_RELEASE_ASSERT(
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
sInstance->mDeadProfiledThreads.length()));
for (ThreadRegistry::OffThreadRef offThreadRef : aLockedRegistry) {
ProfiledThreadData* profiledThreadData =
offThreadRef.UnlockedRWForLockedProfilerRef().GetProfiledThreadData(
aLock);
if (!profiledThreadData) {
// This thread was not profiled, continue with the next one.
continue;
}
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
offThreadRef.GetLockedRWFromAnyThread();
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
profiledThreadData->Info().RegisterTime(),
lockedThreadData->GetJSContext(), profiledThreadData}));
}
for (auto& t : sInstance->mDeadProfiledThreads) {
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
t->Info().RegisterTime(), (JSContext*)nullptr, t.get()}));
}
std::sort(array.begin(), array.end(),
[](const ProfiledThreadListElement& a,
const ProfiledThreadListElement& b) {
return a.mRegisterTime < b.mRegisterTime;
});
return array;
}
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
Vector<RefPtr<PageInformation>> array;
for (auto& d : CorePS::RegisteredPages(aLock)) {
MOZ_RELEASE_ASSERT(array.append(d));
}
for (auto& d : sInstance->mDeadProfiledPages) {
MOZ_RELEASE_ASSERT(array.append(d));
}
// We don't need to sort the pages like threads since we won't show them
// as a list.
return array;
}
static ProfiledThreadData* AddLiveProfiledThread(
PSLockRef, UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
MOZ_ASSERT(sInstance);
MOZ_RELEASE_ASSERT(sInstance->mLiveProfiledThreads.append(
LiveProfiledThreadData{std::move(aProfiledThreadData)}));
// Return a weak pointer to the ProfiledThreadData object.
return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
}
static void UnregisterThread(PSLockRef aLockRef,
ProfiledThreadData* aProfiledThreadData) {
MOZ_ASSERT(sInstance);
DiscardExpiredDeadProfiledThreads(aLockRef);
// Find the right entry in the mLiveProfiledThreads array and remove the
// element, moving the ProfiledThreadData object for the thread into the
// mDeadProfiledThreads array.
for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
if (thread.mProfiledThreadData == aProfiledThreadData) {
thread.mProfiledThreadData->NotifyUnregistered(
sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
std::move(thread.mProfiledThreadData)));
sInstance->mLiveProfiledThreads.erase(
&sInstance->mLiveProfiledThreads[i]);
return;
}
}
}
// This is a counter to collect process CPU utilization during profiling.
// It cannot be a raw `ProfilerCounter` because we need to manually add/remove
// it while the profiler lock is already held.
class ProcessCPUCounter final : public AtomicProfilerCount {
public:
explicit ProcessCPUCounter(PSLockRef aLock)
: AtomicProfilerCount("processCPU", &mCounter, nullptr, "CPU",
"Process CPU utilization") {
// Adding on construction, so it's ready before the sampler starts.
locked_profiler_add_sampled_counter(aLock, this);
// Note: Removed from ActivePS::Destroy, because a lock is needed.
}
void Add(int64_t aNumber) { mCounter += aNumber; }
private:
ProfilerAtomicSigned mCounter;
};
PS_GET(ProcessCPUCounter*, MaybeProcessCPUCounter);
PS_GET(PowerCounters*, MaybePowerCounters);
PS_GET(ProfilerCPUFreq*, MaybeCPUFreq);
PS_GET_AND_SET(bool, IsPaused)
// True if sampling is paused (though generic `SetIsPaused()` or specific
// `SetIsSamplingPaused()`).
static bool IsSamplingPaused(PSLockRef lock) {
MOZ_ASSERT(sInstance);
return IsPaused(lock) || sInstance->mIsSamplingPaused;
}
static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
MOZ_ASSERT(sInstance);
sInstance->mIsSamplingPaused = aIsSamplingPaused;
}
static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard any dead threads that were unregistered before bufferRangeStart.
sInstance->mDeadProfiledThreads.eraseIf(
[bufferRangeStart](
const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
Maybe<uint64_t> bufferPosition =
aProfiledThreadData->BufferPositionWhenUnregistered();
MOZ_RELEASE_ASSERT(bufferPosition,
"should have unregistered this thread");
return *bufferPosition < bufferRangeStart;
});
}
static void UnregisterPage(PSLockRef aLock,
uint64_t aRegisteredInnerWindowID) {
MOZ_ASSERT(sInstance);
auto& registeredPages = CorePS::RegisteredPages(aLock);
for (size_t i = 0; i < registeredPages.length(); i++) {
RefPtr<PageInformation>& page = registeredPages[i];
if (page->InnerWindowID() == aRegisteredInnerWindowID) {
page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(
sInstance->mDeadProfiledPages.append(std::move(page)));
registeredPages.erase(®isteredPages[i--]);
}
}
}
static void DiscardExpiredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard any dead pages that were unregistered before
// bufferRangeStart.
sInstance->mDeadProfiledPages.eraseIf(
[bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
Maybe<uint64_t> bufferPosition =
aProfiledPage->BufferPositionWhenUnregistered();
MOZ_RELEASE_ASSERT(bufferPosition,
"should have unregistered this page");
return *bufferPosition < bufferRangeStart;
});
}
static void ClearUnregisteredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
sInstance->mDeadProfiledPages.clear();
}
static void ClearExpiredExitProfiles(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard exit profiles that were gathered before our buffer RangeStart.
// If we have started to overwrite our data from when the Base profile was
// added, we should get rid of that Base profile because it's now older than
// our oldest Gecko profile data.
//
// When adding: (In practice the starting buffer should be empty)
// v Start == End
// | <-- Buffer range, initially empty.
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
//
// Later, still in range:
// v Start v End
// |=========| <-- Buffer range growing.
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
//
// Even later, now out of range:
// v Start v End
// |============| <-- Buffer range full and sliding.
// ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it
if (sInstance->mBaseProfileThreads &&
sInstance->mGeckoIndexWhenBaseProfileAdded
.ConvertToProfileBufferIndex() <
profiler_get_core_buffer().GetState().mRangeStart) {
DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
sInstance->mBaseProfileThreads.get());
sInstance->mBaseProfileThreads.reset();
}
sInstance->mExitProfiles.eraseIf(
[bufferRangeStart](const ExitProfile& aExitProfile) {
return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
});
}
static void AddBaseProfileThreads(PSLockRef aLock,
UniquePtr<char[]> aBaseProfileThreads) {
MOZ_ASSERT(sInstance);
DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get());
sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
sInstance->mGeckoIndexWhenBaseProfileAdded =
ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
profiler_get_core_buffer().GetState().mRangeEnd);
}
static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ClearExpiredExitProfiles(aLock);
DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p",
sInstance->mBaseProfileThreads.get());
return std::move(sInstance->mBaseProfileThreads);
}
static void AddExitProfile(PSLockRef aLock, const nsACString& aExitProfile) {
--> --------------------
--> maximum size reached
--> --------------------