/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// There are three kinds of samples done by the profiler. // // - A "periodic" sample is the most complex kind. It is done in response to a // timer while the profiler is active. It involves writing a stack trace plus // a variety of other values (memory measurements, responsiveness // measurements, markers, etc.) into the main ProfileBuffer. The sampling is // done from off-thread, and so SuspendAndSampleAndResumeThread() is used to // get the register values. // // - A "synchronous" sample is a simpler kind. It is done in response to an API // call (profiler_get_backtrace()). It involves writing a stack trace and // little else into a temporary ProfileBuffer, and wrapping that up in a // ProfilerBacktrace that can be subsequently used in a marker. The sampling // is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the // register values. // // - A "backtrace" sample is the simplest kind. It is done in response to an // API call (profiler_suspend_and_sample_thread()). It involves getting a // stack trace via a ProfilerStackCollector; it does not write to a // ProfileBuffer. The sampling is done from off-thread, and so uses // SuspendAndSampleAndResumeThread() to get the register values.
// To simplify other code in this file, define a helper definition to avoid // repeating the same preprocessor checks.
// The signals that we use to control the profiler conflict with the signals // used to control the code coverage tool. Therefore, if coverage is enabled, // we need to disable our own signal handling mechanisms. #ifndef MOZ_CODE_COVERAGE # ifdef XP_WIN // TODO: Add support for windows "signal"-like behaviour. See Bug 1867328. # elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \ defined(GP_OS_android) || defined(GP_OS_freebsd) // Specify the specific platforms that we want to support # define GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL 1 # else // No support on this unknown platform! # endif #endif
// We need some extra includes if we're supporting async posix signals #ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) # include <signal.h> # include <fcntl.h> # include <unistd.h> # include <errno.h> # include <pthread.h> #endif
#ifdefined(GP_OS_android) # include "JavaExceptions.h" # include "mozilla/java/GeckoJavaSamplerNatives.h" # include "mozilla/jni/Refs.h" #endif
#ifdefined(XP_MACOSX) # include "nsCocoaFeatures.h" #endif
#ifdefined(GP_PLAT_amd64_darwin) # include <cpuid.h> #endif
#ifdefined(GP_OS_windows) # include <processthreadsapi.h>
// GetThreadInformation is not available on Windows 7.
WINBASEAPI BOOL WINAPI GetThreadInformation(
_In_ HANDLE hThread, _In_ THREAD_INFORMATION_CLASS ThreadInformationClass,
_Out_writes_bytes_(ThreadInformationSize) LPVOID ThreadInformation,
_In_ DWORD ThreadInformationSize);
#endif
// Win32 builds always have frame pointers, so FramePointerStackWalk() always // works. #ifdefined(GP_PLAT_x86_windows) # define HAVE_NATIVE_UNWIND # define USE_FRAME_POINTER_STACK_WALK #endif
// Win64 builds always omit frame pointers, so we use the slower // MozStackWalk(), which works in that case. #ifdefined(GP_PLAT_amd64_windows) # define HAVE_NATIVE_UNWIND # define USE_MOZ_STACK_WALK #endif
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower // MozStackWalk(). #ifdefined(GP_PLAT_arm64_windows) # define HAVE_NATIVE_UNWIND # define USE_MOZ_STACK_WALK #endif
// Mac builds use FramePointerStackWalk(). Even if we build without // frame pointers, we'll still get useful stacks in system libraries // because those always have frame pointers. // We don't use MozStackWalk() on Mac. #ifdefined(GP_OS_darwin) # define HAVE_NATIVE_UNWIND # define USE_FRAME_POINTER_STACK_WALK #endif
// Android builds use the ARM Exception Handling ABI to unwind. #ifdefined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) # define HAVE_NATIVE_UNWIND # define USE_EHABI_STACKWALK # include "EHABIStackWalk.h" #endif
// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks. #ifdefined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \ defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \ defined(GP_PLAT_arm64_freebsd) # define HAVE_NATIVE_UNWIND # define USE_LUL_STACKWALK # include "lul/LulMain.h" # include "lul/platform-linux-lul.h"
// On linux we use LUL for periodic samples and synchronous samples, but we use // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled. // (See the comment at the top of the file for a definition of // periodic/synchronous/backtrace.). // // FramePointerStackWalk can produce incomplete stacks when the current entry is // in a shared library without framepointers, however LUL can take a long time // to initialize, which is undesirable for consumers of // profiler_suspend_and_sample_thread like the Background Hang Reporter. # ifdefined(MOZ_PROFILING) # define USE_FRAME_POINTER_STACK_WALK # endif #endif
// We can only stackwalk without expensive initialization on platforms which // support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires // initializing LUL, and EHABIStackWalk requires initializing EHABI, both of // which can be expensive. #ifdefined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) # define HAVE_FASTINIT_NATIVE_UNWIND #endif
using mozilla::profiler::detail::RacyFeatures; using ThreadRegistration = mozilla::profiler::ThreadRegistration; using ThreadRegistrationInfo = mozilla::profiler::ThreadRegistrationInfo; using ThreadRegistry = mozilla::profiler::ThreadRegistry;
LazyLogModule gProfilerLog("prof");
ProfileChunkedBuffer& profiler_get_core_buffer() { // Defer to the Base Profiler in mozglue to create the core buffer if needed, // and keep a reference here, for quick access in xul. static ProfileChunkedBuffer& sProfileChunkedBuffer =
baseprofiler::profiler_get_core_buffer(); return sProfileChunkedBuffer;
}
#ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) // Control character to start the profiler ('g' for "go"!) staticconstchar sAsyncSignalControlCharStart = 'g'; // Control character to stop the profiler ('s' for "stop"!) staticconstchar sAsyncSignalControlCharStop = 's';
// This is a file descriptor that is the "write" end of the POSIX pipe that we // use to start the profiler. It is written to in profiler_start_signal_handler // and read from in AsyncSignalControlThread static mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed>
sAsyncSignalControlWriteFd(-1);
// Atomic flag to stop the profiler from within the sampling loop
mozilla::Atomic<bool, mozilla::MemoryOrdering::Relaxed> gStopAndDumpFromSignal( false); #endif
// Forward declare the function to call when we need to dump + stop from within // the async control thread void profiler_dump_and_stop(); // Forward declare the function to call when we need to start the profiler. void profiler_start_from_signal();
uint32_t features = 0;
features = ParseFeaturesFromStringArray(featureStringArray.begin(),
featureStringArray.length());
// 128 * 1024 * 1024 is the entries preset that is given in // devtools/client/performance-new/shared/background.sys.mjs
profiler_start(PowerOfTwo32(128 * 1024 * 1024), 5.0, features,
filtersTemp.begin(), filtersTemp.length(), 0, Nothing());
}
// Add all the possible features.
PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
#undef ADD_FEATURE
// Now remove features not supported on this platform/configuration. #if !defined(GP_OS_android)
ProfilerFeature::ClearJava(features); #endif #if !defined(HAVE_NATIVE_UNWIND)
ProfilerFeature::ClearStackWalk(features); #endif #ifdefined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) if (getenv("XPCOM_MEM_BLOAT_LOG")) {
DEBUG_LOG("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations."); // The memory hooks are available, but the bloat log is enabled, which is // not compatible with the native allocations tracking. See the comment in // enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for // more information.
ProfilerFeature::ClearNativeAllocations(features);
} #else // The memory hooks are not available.
ProfilerFeature::ClearNativeAllocations(features); #endif #if !defined(MOZ_MEMORY) or !defined(MOZ_PROFILER_MEMORY)
ProfilerFeature::ClearMemory(features); #endif
// Default features common to all contexts (even if not available). static constexpr uint32_t DefaultFeatures() { return ProfilerFeature::Java | ProfilerFeature::JS |
ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
ProfilerFeature::Screenshots | ProfilerFeature::ProcessCPU;
}
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not // available). static constexpr uint32_t StartupExtraDefaultFeatures() { // Enable file I/Os by default for startup profiles as startup is heavy on // I/O operations. return ProfilerFeature::FileIOAll | ProfilerFeature::IPCMessages;
}
Json::String ToCompactString(const Json::Value& aJsonValue) {
Json::StreamWriterBuilder builder; // No indentations, and no newlines.
builder["indentation"] = ""; // This removes spaces after colons.
builder["enableYAMLCompatibility"] = false; // Only 6 digits after the decimal point; timestamps in ms have ns precision.
builder["precision"] = 6;
builder["precisionType"] = "decimal";
// RAII class to lock the profiler mutex. // It provides a mechanism to determine if it is locked or not in order for // memory hooks to avoid re-entering the profiler locked state. // Locking order: Profiler, ThreadRegistry, ThreadRegistration. class MOZ_RAII PSAutoLock { public:
PSAutoLock()
: mLock([]() -> mozilla::baseprofiler::detail::BaseProfilerMutex& { // In DEBUG builds, *before* we attempt to lock gPSMutex, we want to // check that the ThreadRegistry, ThreadRegistration, and ProfilingLog // mutexes are *not* locked on this thread, to avoid inversion // deadlocks.
MOZ_ASSERT(!ThreadRegistry::IsRegistryMutexLockedOnCurrentThread());
MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread());
MOZ_ASSERT(!ProfilingLog::IsLockedOnCurrentThread()); return gPSMutex;
}()) {}
// ASYNC POSIX SIGNAL HANDLING SUPPORT // // Integrating POSIX signals // (https://man7.org/linux/man-pages/man7/signal.7.html) into a complex // multi-threaded application such as Firefox can be a tricky proposition. // Signals are delivered by the operating system to a program, which then // invokes a signal handler // (https://man7.org/linux/man-pages/man2/sigaction.2.html) outside the normal // flow of control. This handler is responsible for performing operations in // response to the signal. If there is no "custom" handler defined, then default // behaviour is triggered, which usually results in a terminated program. // // As signal handlers interrupt the normal flow of control, Firefox may not be // in a safe state while the handler is running (e.g. it may be halfway through // a garbage collection cycle, or a critical lock may be held by the current // thread). This is something we must be aware of while writing one, and we are // additionally limited in terms of which POSIX functions we can call to those // which are async signal safe // (https://man7.org/linux/man-pages/man7/signal-safety.7.html). // // In the context of Firefox, this presents a number of details that we must be // aware of: // // * We are very limited by what we can call when we handle a signal: Many // functions in Firefox, and in the profiler specifically, allocate memory // when called. Allocating memory is specifically **not** async-signal-safe, // and so any functions that allocate should not be called from a signal // handler. // // * We need to be careful with how we communicate to other threads in the // process. The signal handler runs asynchronously, interrupting the current // thread of execution. Communication should therefore use atomics or other // concurrency constructs to ensure that data is read and written correctly. // We should avoid taking locks, as we may easily deadlock while within the // signal handler. // // * We cannot use the usual Firefox mechanisms for triggering behaviour in // other threads. For instance, tools such as ``NS_DispatchToMainThread`` // allocate memory when called, which is not allowed within a signal handler. // // We solve these constraints by introducing a new thread within the Firefox // profiler, the AsyncSignalControlThread which is responsible for carrying out // the actions triggered by a signal handler. We communicate between handlers // and this thread with the use of a libc pipe // (https://pubs.opengroup.org/onlinepubs/9699919799/functions/write.html#tag_16_685_08). // Writing to a pipe is async-signal-safe, so we can do so from a signal // handler, and we can set the pipe to be "blocking", meaning that when our // control thread tries to read it will block at the OS level (consuming no CPU) // until the handler writes to it. This is in contrast to (e.g.) an atomic // variable, where our thread would have to "busy wait" for it to be set. // // We have one "control" thread per process, and use a single byte for messages // we send. Writes to pipes are atomic if the size is less than or equal to // ``PIPE_BUF``, which (although implementation defined) in our case is always // one, thus trivially atomic. // // The control flow for a typical Firefox session in which a user starts and // stops profiling using POSIX signals therefore looks something like the // following: // // * Profiler initialization. // // * The main thread of each process starts the signal control thread, and // initialises signal handlers for ``SIGUSR1`` and ``SIGSUR2``. // * The signal control thread sets up pipes for communication, and begins // reading, blocking itself. // // * *After some time...* // * The user sends ``SIGUSR1`` to Firefox, e.g. using ``kill -s USR1 <firefox // pid>`` // // * The profiler_start_signal_handler signal handler for ``SIGUSR1`` is // triggered by the operating system. This writes the "start" control // character to the communication pipe and returns. // * The signal control thread wakes up, as there is now data on the pipe. // * The control thread recognises the "start" character, and starts the // profiler with a set of default presets. // * The control thread loops, and goes back to waiting on the pipe. // // * *The user uses Firefox, or waits for it to do something...* // * The user sends ``SIGUSR2`` to Firefox, e.g. using ``kill -s USR1 <firefox // pid>`` // // * The profiler_stop_signal_handler signal handler for ``SIGUSR2`` is // triggered by the operating system. This writes the "stop" control // character to the communication pipe and returns. // * The signal control thread wakes up, as there is now data on the pipe. // * The control thread recognises the "stop" character, and calls // profiler_stop_signal_handler to dump the profile to disk. // * The control thread loops, and goes back to waiting on the pipe. // // * *The user can now start another profiling session...* //
// Forward declare this, so we can call it from the constructor. staticvoid* AsyncSignalControlThreadEntry(void* aArg);
// Define our platform specific async (posix) signal control thread here. class AsyncSignalControlThread { public:
AsyncSignalControlThread() : mThread() { // Try to open a pipe for this to communicate with. If we can't do this, // then we give up and return, as there's no point continuing without // being able to communicate int pipeFds[2]; if (pipe(pipeFds)) {
LOG("Profiler AsyncSignalControlThread failed to create a pipe."); return;
}
// Close this pipe on calls to exec().
fcntl(pipeFds[0], F_SETFD, FD_CLOEXEC);
fcntl(pipeFds[1], F_SETFD, FD_CLOEXEC);
// Write the reading side to mFd, and the writing side to the global atomic
mFd = pipeFds[0];
sAsyncSignalControlWriteFd = pipeFds[1];
// We don't really care about stack size, as it should be minimal, so // leave the pthread attributes as a nullptr, i.e. choose the default.
pthread_attr_t* attr_ptr = nullptr; if (pthread_create(&mThread, attr_ptr, AsyncSignalControlThreadEntry, this) != 0) {
MOZ_CRASH("pthread_create failed");
}
};
~AsyncSignalControlThread() { // Derived from code in nsDumpUtils.cpp. Comment reproduced here for // poisterity: Close sAsyncSignalControlWriteFd /after/ setting the fd to // -1. Otherwise we have the (admittedly far-fetched) race where we // // 1) close sAsyncSignalControlWriteFd // 2) open a new fd with the same number as sAsyncSignalControlWriteFd // had. // 3) receive a signal, then write to the fd. int asyncSignalControlWriteFd = sAsyncSignalControlWriteFd.exchange(-1); // This will unblock the "read" in StartWatching.
close(asyncSignalControlWriteFd); // Finally, exit the thread.
pthread_join(mThread, nullptr);
};
void Watch() { char msg[1];
ssize_t nread; while (true) { // Try reading from the pipe. This will block until something is written:
nread = read(mFd, msg, sizeof(msg));
if (nread == -1 && errno == EINTR) { // nread == -1 and errno == EINTR means that `read` was interrupted // by a signal before reading any data. This is likely because the // profiling thread interrupted us (with SIGPROF). We can safely ignore // this and "go around" the loop again to try and read. continue;
}
if (nread == -1 && errno != EINTR) { // nread == -1 and errno != EINTR means that `read` has failed in some // way that we can't recover from. In this case, all we can do is give // up, and quit the watcher, as the pipe is likely broken.
LOG("Error (%d) when reading in AsyncSignalControlThread", errno); return;
}
if (nread == 0) { // nread == 0 signals that the other end of the pipe has been cleanly // closed. Close our end, and exit the reading loop.
close(mFd); return;
}
// If we reach here, nread != 0 and nread != -1. This means that we // should have read at least one byte, which should be a control byte // for the profiler. // It *might* happen that `read` is interrupted by the sampler thread // after successfully reading. If this occurs, read returns the number // of bytes read. As anything other than 1 is wrong for us, we can // always assume that we can read whatever `read` read.
MOZ_RELEASE_ASSERT(nread == 1);
if (msg[0] == sAsyncSignalControlCharStart) { // Check to see if the profiler is already running. This is done within // `profiler_start` anyway, but if we check sooner we avoid running all // the other code between now and that check. if (!profiler_is_active()) {
profiler_start_from_signal();
}
} elseif (msg[0] == sAsyncSignalControlCharStop) { // Check to see whether the profiler is even running before trying to // stop the profiler. Most other methods of stopping the profiler (i.e. // those through nsProfiler etc) already know whether or not the // profiler is running, so don't try and stop it if it's already // running. Signal-stopping doesn't have this constraint, so we should // check just in case there is a codepath followed by // `profiler_dump_and_stop` that breaks if we stop while stopped. if (profiler_is_active()) {
profiler_dump_and_stop();
}
} else {
LOG("AsyncSignalControlThread recieved unknown control signal: %c",
msg[0]);
}
}
};
private: // The read side of the pipe that we use to communicate from a signal handler // to the AsyncSignalControlThread int mFd;
// The thread handle for the async signal control thread // Note, that unlike the sampler thread, this is currently a posix-only // feature. Therefore, we don't bother to have a windows equivalent - we // just use a pthread_t
pthread_t mThread;
};
// All functions in this file can run on multiple threads unless they have an // NS_IsMainThread() assertion.
// This class contains the profiler's core global state, i.e. that which is // valid even when the profiler is not active. Most profile operations can't do // anything useful when this class is not instantiated, so we release-assert // its non-nullness in all such operations. // // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a // PSAutoLock reference as an argument as proof that the gPSMutex is currently // locked. This makes it clear when gPSMutex is locked and helps avoid // accidental unlocked accesses to global state. There are ways to circumvent // this mechanism, but please don't do so without *very* good reason and a // detailed explanation. // // The exceptions to this rule: // // - mProcessStartTime, because it's immutable; class CorePS { private: #ifdef MOZ_PERFETTO class PerfettoObserver : public perfetto::TrackEventSessionObserver { public:
PerfettoObserver() { perfetto::TrackEvent::AddSessionObserver(this); }
~PerfettoObserver() { perfetto::TrackEvent::RemoveSessionObserver(this); }
CorePS()
: mProcessStartTime(TimeStamp::ProcessCreation()),
mMaybeBandwidthCounter(nullptr) #ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
,
mAsyncSignalControlThread(nullptr) #endif #ifdef USE_LUL_STACKWALK
,
mLul(nullptr) #endif
{
MOZ_ASSERT(NS_IsMainThread(), "CorePS must be created from the main thread");
}
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex // being locked. This is because CorePS is instantiated so early on the main // thread that we don't have to worry about it being racy. staticbool Exists() { return !!sInstance; }
for (auto& registeredPage : sInstance->mRegisteredPages) {
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
}
// Measurement of the following things may be added later if DMD finds it // is worthwhile: // - CorePS::mRegisteredPages itself (its elements' children are // measured above)
auto foundPageIter = std::find_if(
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
RegisteredPageComparator{aRegisteredPage.get()});
if (foundPageIter != sInstance->mRegisteredPages.end()) { if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) { // When a BrowsingContext is loaded, the first url loaded in it will be // about:blank, and if the principal matches, the first document loaded // in it will share an inner window. That's why we should delete the // intermittent about:blank if they share the inner window.
sInstance->mRegisteredPages.erase(foundPageIter);
} else { // Do not register the same page again. return;
}
}
staticvoid AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
MOZ_ASSERT(sInstance); // we don't own the counter; they may be stored in static objects
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
}
staticvoid RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) { // we may be called to remove a counter after the profiler is stopped or // late in shutdown. if (sInstance) { auto* counter = std::find(sInstance->mCounters.begin(),
sInstance->mCounters.end(), aCounter);
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
sInstance->mCounters.erase(counter);
}
}
private: // The singleton instance static CorePS* sInstance;
// The time that the process started. const TimeStamp mProcessStartTime;
// Network bandwidth counter for the Bandwidth feature.
ProfilerBandwidthCounter* mMaybeBandwidthCounter;
// Info on all the registered pages. // InnerWindowIDs in mRegisteredPages are unique.
Vector<RefPtr<PageInformation>> mRegisteredPages;
// Non-owning pointers to all active counters
Vector<BaseProfilerCount*> mCounters;
#ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) // Background thread for communicating with async signal handlers
AsyncSignalControlThread* mAsyncSignalControlThread; #endif
#ifdef USE_LUL_STACKWALK // LUL's state. Null prior to the first activation, non-null thereafter. // Owned by this CorePS.
mozilla::Atomic<lul::LUL*> mLul; #endif
// Process name, provided by child process initialization code.
nsAutoCString mProcessName; // Private name, provided by child process initialization code (eTLD+1 in // fission)
nsAutoCString mETLDplus1;
// This memory buffer is used by the MergeStacks mechanism. Previously it was // stack allocated, but this led to a stack overflow, as it was too much // memory. Here the buffer can be pre-allocated, and shared with the // MergeStacks feature as needed. MergeStacks is only run while holding the // lock, so it is safe to have only one instance allocated for all of the // threads.
JsFrameBuffer mJsFrames;
// Cached download directory for when we need to dump profiles to disk. #if !defined(XP_WIN)
Maybe<nsCOMPtr<nsIFile>> mAsyncSignalDumpDirectory; #endif
};
void locked_profiler_remove_sampled_counter(PSLockRef aLock,
BaseProfilerCount* aCounter) { // Note: we don't enforce a final sample, though we could do so if the // profiler was active
CorePS::RemoveCounter(aLock, aCounter);
}
// This class contains the profiler's global state that is valid only when the // profiler is active. When not instantiated, the profiler is inactive. // // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as // CorePS. // class ActivePS { private:
constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) { return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
scBytesPerEntry / scMinimumNumberOfChunks,
size_t(scMaximumChunkSize)));
}
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) { // Filter out any features unavailable in this platform/configuration.
aFeatures &= AvailableFeatures();
// Some features imply others. if (aFeatures & ProfilerFeature::FileIOAll) {
aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
} elseif (aFeatures & ProfilerFeature::FileIO) {
aFeatures |= ProfilerFeature::MainThreadIO;
}
if (aFeatures & ProfilerFeature::CPUAllThreads) {
aFeatures |= ProfilerFeature::CPUUtilization;
}
ActivePS(
PSLockRef aLock, const TimeStamp& aProfilingStartTime,
PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, constchar** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, const Maybe<double>& aDuration,
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull)
: mProfilingStartTime(aProfilingStartTime),
mGeneration(sNextGeneration++),
mCapacity(aCapacity),
mDuration(aDuration),
mInterval(aInterval),
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
mActiveTabID(aActiveTabID),
mProfileBufferChunkManager(
aChunkManagerOrNull
? std::move(aChunkManagerOrNull)
: MakeUnique<ProfileBufferChunkManagerWithLocalLimit>(
size_t(ClampToAllowedEntries(aCapacity.Value())) *
scBytesPerEntry,
ChunkSizeForEntries(aCapacity.Value()))),
mProfileBuffer([this]() -> ProfileChunkedBuffer& {
ProfileChunkedBuffer& coreBuffer = profiler_get_core_buffer();
coreBuffer.SetChunkManagerIfDifferent(*mProfileBufferChunkManager); return coreBuffer;
}()),
mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures)
? new ProcessCPUCounter(aLock)
: nullptr),
mMaybePowerCounters(nullptr),
mMaybeCPUFreq(nullptr), // The new sampler thread doesn't start sampling immediately because the // main loop within Run() is blocked until this function's caller // unlocks gPSMutex.
mSamplerThread(
NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)),
mIsPaused(false),
mIsSamplingPaused(false) {
ProfilingLog::Init();
// Deep copy and lower-case aFilters.
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount)); for (uint32_t i = 0; i < aFilterCount; ++i) {
mFilters[i] = aFilters[i];
mFiltersLowered[i].reserve(mFilters[i].size());
std::transform(mFilters[i].cbegin(), mFilters[i].cend(),
std::back_inserter(mFiltersLowered[i]), ::tolower);
}
#if !defined(RELEASE_OR_BETA) if (ShouldInterposeIOs()) { // We need to register the observer on the main thread, because we want // to observe IO that happens on the main thread. // IOInterposer needs to be initialized before calling // IOInterposer::Register or our observer will be silently dropped. if (NS_IsMainThread()) {
IOInterposer::Init();
IOInterposer::Register(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::ActivePS", []() { // Note: This could theoretically happen after ActivePS gets // destroyed, but it's ok: // - The Observer always checks that the profiler is (still) // active before doing its work. // - The destruction should happen on the same thread as this // construction, so the un-registration will also be dispatched // and queued on the main thread, and run after this.
IOInterposer::Init();
IOInterposer::Register(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
} #endif
if (ProfilerFeature::HasPower(aFeatures)) {
mMaybePowerCounters = new PowerCounters(); for (constauto& powerCounter : mMaybePowerCounters->GetCounters()) {
locked_profiler_add_sampled_counter(aLock, powerCounter.get());
}
}
if (ProfilerFeature::HasCPUFrequency(aFeatures)) {
mMaybeCPUFreq = new ProfilerCPUFreq();
}
}
~ActivePS() {
MOZ_ASSERT(
!mMaybeProcessCPUCounter, "mMaybeProcessCPUCounter should have been deleted before ~ActivePS()");
MOZ_ASSERT(
!mMaybePowerCounters, "mMaybePowerCounters should have been deleted before ~ActivePS()");
MOZ_ASSERT(!mMaybeCPUFreq, "mMaybeCPUFreq should have been deleted before ~ActivePS()"); #ifdefined(MOZ_MEMORY) && defined(MOZ_PROFILER_MEMORY)
MOZ_ASSERT(!mMemoryCounter, "mMemoryCounter should have been deleted before ~ActivePS()"); #endif
#if !defined(RELEASE_OR_BETA) if (ShouldInterposeIOs()) { // We need to unregister the observer on the main thread, because that's // where we've registered it. if (NS_IsMainThread()) {
IOInterposer::Unregister(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::~ActivePS", []() {
IOInterposer::Unregister(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
} #endif if (mProfileBufferChunkManager) { // We still control the chunk manager, remove it from the core buffer.
profiler_get_core_buffer().ResetChunkManager();
}
ProfilingLog::Destroy();
}
bool ThreadSelected(constchar* aThreadName) { if (mFiltersLowered.empty()) { returntrue;
}
std::string name = aThreadName;
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
for (constauto& filter : mFiltersLowered) { if (filter == "*") { returntrue;
}
// Crude, non UTF-8 compatible, case insensitive substring search if (name.find(filter) != std::string::npos) { returntrue;
}
// If the filter is "pid:<my pid>", profile all threads. if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) { returntrue;
}
}
ProfilerBandwidthCounter* counter = CorePS::GetBandwidthCounter(); if (counter && counter->IsRegistered()) { // Because profiler_count_bandwidth_bytes does a racy // profiler_feature_active check to avoid taking the lock, // free'ing the memory of the counter would be crashy if the // socket thread attempts to increment the counter while we are // stopping the profiler. // Instead, we keep the counter in CorePS and only mark it as // unregistered so that the next attempt to count bytes // will re-register it.
locked_profiler_remove_sampled_counter(aLock, counter);
counter->MarkUnregistered();
}
auto samplerThread = sInstance->mSamplerThread; delete sInstance;
sInstance = nullptr;
n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it // is worthwhile: // - mLiveProfiledThreads (both the array itself, and the contents) // - mDeadProfiledThreads (both the array itself, and the contents) //
return n;
}
static ThreadProfilingFeatures ProfilingFeaturesForThread(
PSLockRef aLock, const ThreadRegistrationInfo& aInfo) {
MOZ_ASSERT(sInstance); if (sInstance->ThreadSelected(aInfo.Name())) { // This thread was selected by the user, record everything. return ThreadProfilingFeatures::Any;
}
ThreadProfilingFeatures features = ThreadProfilingFeatures::NotProfiled; if (ActivePS::FeatureCPUAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::CPUUtilization);
} if (ActivePS::FeatureSamplingAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Sampling);
} if (ActivePS::FeatureMarkersAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Markers);
} return features;
}
// Writes out the current active configuration of the profile. staticvoid WriteActiveConfiguration(
PSLockRef aLock, JSONWriter& aWriter, const Span<constchar>& aPropertyName = MakeStringSpan("")) { if (!sInstance) { if (!aPropertyName.empty()) {
aWriter.NullProperty(aPropertyName);
} else {
aWriter.NullElement();
} return;
};
if (!aPropertyName.empty()) {
aWriter.StartObjectProperty(aPropertyName);
} else {
aWriter.StartObjectElement();
}
PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES) #undef WRITE_ACTIVE_FEATURES
aWriter.EndArray();
}
{
aWriter.StartArrayProperty("threads"); for (constauto& filter : sInstance->mFilters) {
aWriter.StringElement(filter);
}
aWriter.EndArray();
}
{ // Now write all the simple values.
// The interval is also available on profile.meta.interval
aWriter.DoubleProperty("interval", sInstance->mInterval);
aWriter.IntProperty("capacity", sInstance->mCapacity.Value()); if (sInstance->mDuration) {
aWriter.DoubleProperty("duration", sInstance->mDuration.value());
} // Here, we are converting uint64_t to double. Tab IDs are // being created using `nsContentUtils::GenerateProcessSpecificId`, which // is specifically designed to only use 53 of the 64 bits to be lossless // when passed into and out of JS as a double.
aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
}
aWriter.EndObject();
}
// Not using PS_GET, because only the "Controlled" interface of // `mProfileBufferChunkManager` should be exposed here. static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
PSLockRef) {
MOZ_ASSERT(sInstance);
MOZ_ASSERT(sInstance->mProfileBufferChunkManager); return *sInstance->mProfileBufferChunkManager;
}
staticvoid FulfillChunkRequests(PSLockRef) {
MOZ_ASSERT(sInstance); if (sInstance->mProfileBufferChunkManager) {
sInstance->mProfileBufferChunkManager->FulfillChunkRequests();
}
}
struct ProfiledThreadListElement {
TimeStamp mRegisterTime;
JSContext* mJSContext; // Null for unregistered threads.
ProfiledThreadData* mProfiledThreadData;
}; using ProfiledThreadList = Vector<ProfiledThreadListElement>;
// Returns a ProfiledThreadList with all threads that should be included in a // profile, both for threads that are still registered, and for threads that // have been unregistered but still have data in the buffer. // The returned array is sorted by thread register time. // Do not hold on to the return value past LockedRegistry. static ProfiledThreadList ProfiledThreads(
ThreadRegistry::LockedRegistry& aLockedRegistry, PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ProfiledThreadList array;
MOZ_RELEASE_ASSERT(
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
sInstance->mDeadProfiledThreads.length()));
for (ThreadRegistry::OffThreadRef offThreadRef : aLockedRegistry) {
ProfiledThreadData* profiledThreadData =
offThreadRef.UnlockedRWForLockedProfilerRef().GetProfiledThreadData(
aLock); if (!profiledThreadData) { // This thread was not profiled, continue with the next one. continue;
}
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
offThreadRef.GetLockedRWFromAnyThread();
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
profiledThreadData->Info().RegisterTime(),
lockedThreadData->GetJSContext(), profiledThreadData}));
}
for (auto& t : sInstance->mDeadProfiledThreads) {
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
t->Info().RegisterTime(), (JSContext*)nullptr, t.get()}));
}
std::sort(array.begin(), array.end(),
[](const ProfiledThreadListElement& a, const ProfiledThreadListElement& b) { return a.mRegisterTime < b.mRegisterTime;
}); return array;
}
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
Vector<RefPtr<PageInformation>> array; for (auto& d : CorePS::RegisteredPages(aLock)) {
MOZ_RELEASE_ASSERT(array.append(d));
} for (auto& d : sInstance->mDeadProfiledPages) {
MOZ_RELEASE_ASSERT(array.append(d));
} // We don't need to sort the pages like threads since we won't show them // as a list. return array;
}
// Find the right entry in the mLiveProfiledThreads array and remove the // element, moving the ProfiledThreadData object for the thread into the // mDeadProfiledThreads array. for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i]; if (thread.mProfiledThreadData == aProfiledThreadData) {
thread.mProfiledThreadData->NotifyUnregistered(
sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
std::move(thread.mProfiledThreadData)));
sInstance->mLiveProfiledThreads.erase(
&sInstance->mLiveProfiledThreads[i]); return;
}
}
}
// This is a counter to collect process CPU utilization during profiling. // It cannot be a raw `ProfilerCounter` because we need to manually add/remove // it while the profiler lock is already held. class ProcessCPUCounter final : public AtomicProfilerCount { public: explicit ProcessCPUCounter(PSLockRef aLock)
: AtomicProfilerCount("processCPU", &mCounter, nullptr, "CPU", "Process CPU utilization") { // Adding on construction, so it's ready before the sampler starts.
locked_profiler_add_sampled_counter(aLock, this); // Note: Removed from ActivePS::Destroy, because a lock is needed.
}
staticvoid ClearExpiredExitProfiles(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); // Discard exit profiles that were gathered before our buffer RangeStart. // If we have started to overwrite our data from when the Base profile was // added, we should get rid of that Base profile because it's now older than // our oldest Gecko profile data. // // When adding: (In practice the starting buffer should be empty) // v Start == End // | <-- Buffer range, initially empty. // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it // // Later, still in range: // v Start v End // |=========| <-- Buffer range growing. // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it // // Even later, now out of range: // v Start v End // |============| <-- Buffer range full and sliding. // ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it if (sInstance->mBaseProfileThreads &&
sInstance->mGeckoIndexWhenBaseProfileAdded
.ConvertToProfileBufferIndex() <
profiler_get_core_buffer().GetState().mRangeStart) {
DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
sInstance->mBaseProfileThreads.get());
sInstance->mBaseProfileThreads.reset();
}
sInstance->mExitProfiles.eraseIf(
[bufferRangeStart](const ExitProfile& aExitProfile) { return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
});
}
private: // The singleton instance. static ActivePS* sInstance;
const TimeStamp mProfilingStartTime;
// We need to track activity generations. If we didn't we could have the // following scenario. // // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks // gPSMutex, deletes the SamplerThread (which does a join). // // - profiler_start() runs on a different thread, locks gPSMutex, // re-instantiates ActivePS, unlocks gPSMutex -- all before the join // completes. // // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated, // and continues as if the start/stop pair didn't occur. Also // profiler_stop() is stuck, unable to finish. // // By checking ActivePS *and* the generation, we can avoid this scenario. // sNextGeneration is used to track the next generation number; it is static // because it must persist across different ActivePS instantiations. const uint32_t mGeneration; static uint32_t sNextGeneration;
// The maximum number of entries in mProfileBuffer. const PowerOfTwo32 mCapacity;
// The maximum duration of entries in mProfileBuffer, in seconds. const Maybe<double> mDuration;
// The interval between samples, measured in milliseconds. constdouble mInterval;
// The profile features that are enabled. const uint32_t mFeatures;
// Substrings of names of threads we want to profile.
Vector<std::string> mFilters;
Vector<std::string> mFiltersLowered;
// ID of the active browser screen's active tab. // It's being used to determine the profiled tab. It's "0" if we failed to // get the ID. const uint64_t mActiveTabID;
// The chunk manager used by `mProfileBuffer` below. // May become null if it gets transferred ouf of the Gecko Profiler.
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> mProfileBufferChunkManager;
// The buffer into which all samples are recorded.
ProfileBuffer mProfileBuffer;
// ProfiledThreadData objects for any threads that were profiled at any point // during this run of the profiler: // - mLiveProfiledThreads contains all threads that are still registered, and // - mDeadProfiledThreads contains all threads that have already been // unregistered but for which there is still data in the profile buffer.
Vector<LiveProfiledThreadData> mLiveProfiledThreads;
Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
// Info on all the dead pages. // Registered pages are being moved to this array after unregistration. // We are keeping them in case we need them in the profile data. // We are removing them when we ensure that we won't need them anymore.
Vector<RefPtr<PageInformation>> mDeadProfiledPages;
// Used to collect process CPU utilization values, if the feature is on.
ProcessCPUCounter* mMaybeProcessCPUCounter;
// Used to collect power use data, if the power feature is on.
PowerCounters* mMaybePowerCounters;
// Used to collect cpu frequency, if the CPU frequency feature is on.
ProfilerCPUFreq* mMaybeCPUFreq;
// The current sampler thread. This class is not responsible for destroying // the SamplerThread object; the Destroy() method returns it so the caller // can destroy it.
SamplerThread* const mSamplerThread;
// Is the profiler fully paused? bool mIsPaused;
// Is the profiler periodic sampling paused? bool mIsSamplingPaused;
#ifdef DEBUG // Check if a non-zero id is not already used. Bug forgive it in non-DEBUG // builds; in the worst case they may get removed too early. if (aUniqueIdentifier != 0) { for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
mIdentifiedProfilingStateChangeCallbacks) {
MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier);
}
} #endif// DEBUG
if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) &&
profiler_is_active()) {
aCallback(ProfilingState::AlreadyActive);
}
// Remove the callback with the given identifier. void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) {
MOZ_ASSERT(aUniqueIdentifier != 0); if (aUniqueIdentifier == 0) { // Forgive zero in non-DEBUG builds. return;
}
// The name of the main thread. staticconstchar* const kMainThreadName = "GeckoMain";
//////////////////////////////////////////////////////////////////////// // BEGIN sampling/unwinding code
// Additional registers that have to be saved when thread is paused. #ifdefined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) || \ defined(GP_ARCH_x86) # define UNWINDING_REGS_HAVE_ECX_EDX #elifdefined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ defined(GP_PLAT_amd64_freebsd) || defined(GP_ARCH_amd64) || \ defined(__x86_64__) # define UNWINDING_REGS_HAVE_R10_R12 #elifdefined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) # define UNWINDING_REGS_HAVE_LR_R7 #elifdefined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ defined(GP_PLAT_arm64_freebsd) || defined(GP_ARCH_arm64) || \ defined(__aarch64__) # define UNWINDING_REGS_HAVE_LR_R11 #endif
// The registers used for stack unwinding and a few other sampling purposes. // The ctor does nothing; users are responsible for filling in the fields. class Registers { public:
Registers()
: mPC{nullptr},
mSP{nullptr},
mFP{nullptr} #ifdefined(UNWINDING_REGS_HAVE_ECX_EDX)
,
mEcx{nullptr},
mEdx{nullptr} #elifdefined(UNWINDING_REGS_HAVE_R10_R12)
,
mR10{nullptr},
mR12{nullptr} #elifdefined(UNWINDING_REGS_HAVE_LR_R7)
,
mLR{nullptr},
mR7{nullptr} #elifdefined(UNWINDING_REGS_HAVE_LR_R11)
,
mLR{nullptr},
mR11{nullptr} #endif
{
}
void Clear() { memset(this, 0, sizeof(*this)); }
// These fields are filled in by // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace // samples, and by REGISTERS_SYNC_POPULATE for synchronous samples.
Address mPC; // Instruction pointer.
Address mSP; // Stack pointer.
Address mFP; // Frame pointer. #ifdefined(UNWINDING_REGS_HAVE_ECX_EDX)
Address mEcx; // Temp for return address.
Address mEdx; // Temp for frame pointer. #elifdefined(UNWINDING_REGS_HAVE_R10_R12)
Address mR10; // Temp for return address.
Address mR12; // Temp for frame pointer. #elifdefined(UNWINDING_REGS_HAVE_LR_R7)
Address mLR; // ARM link register, or temp for return address.
Address mR7; // Temp for frame pointer. #elifdefined(UNWINDING_REGS_HAVE_LR_R11)
Address mLR; // ARM link register, or temp for return address.
Address mR11; // Temp for frame pointer. #endif
#ifdefined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) // This contains all the registers, which means it duplicates the four fields // above. This is ok.
ucontext_t* mContext; // The context from the signal handler or below.
ucontext_t mContextSyncStorage; // Storage for sync stack unwinding. #endif
};
class StackWalkControl { public: struct ResumePoint { // If lost, the stack walker should resume at these values. void* resumeSp; // If null, stop the walker here, don't resume again. void* resumeBp; void* resumePc;
};
// Add a resume point. Note that adding anything past MaxResumePointCount() // would silently fail. In practice this means that stack walking may still // lose native frames. void AddResumePoint(ResumePoint&& aResumePoint) { // If SP is null, we expect BP and PC to also be null.
MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumeBp);
MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumePc);
// If BP and/or PC are not null, SP must not be null. (But we allow BP/PC to // be null even if SP is not null.)
MOZ_ASSERT_IF(aResumePoint.resumeBp, aResumePoint.resumeSp);
MOZ_ASSERT_IF(aResumePoint.resumePc, aResumePoint.resumeSp);
// Find the next resume point that would be a caller of the function with the // given SP; i.e., the resume point with the closest resumeSp > aSp. const ResumePoint* GetResumePointCallingSp(void* aSp) const { const ResumePoint* callingResumePoint = nullptr; for (const ResumePoint& resumePoint : *this) { if (resumePoint.resumeSp && // This is a potential resume point.
resumePoint.resumeSp > aSp && // It is a caller of the given SP.
(!callingResumePoint || // This is the first candidate.
resumePoint.resumeSp < callingResumePoint->resumeSp) // Or better.
) {
callingResumePoint = &resumePoint;
}
} return callingResumePoint;
}
#else public: static constexpr bool scIsSupported = false; // Discarded constexpr-if statements are still checked during compilation, // these declarations are necessary for that, even if not actually used. void Clear();
size_t ResumePointCount(); static constexpr size_t MaxResumePointCount(); void AddResumePoint(ResumePoint&& aResumePoint); const ResumePoint* begin() const; const ResumePoint* end() const; const ResumePoint* GetResumePointCallingSp(void* aSp) const; #endif
};
// Make a copy of the JS stack into a JSFrame array, and return the number of // copied frames. // This copy is necessary since, like the native stack, the JS stack is iterated // youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks. static uint32_t ExtractJsFrames( bool aIsSynchronous, const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const Registers& aRegs, ProfilerStackCollector& aCollector,
JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) {
MOZ_ASSERT(aJsFrames, "ExtractJsFrames should only be called if there is a " "JsFrameBuffer to fill.");
uint32_t jsFramesCount = 0;
// Only walk jit stack if profiling frame iterator is turned on.
JSContext* context = aThreadData.GetJSContext(); if (context && JS::IsProfilingEnabledForContext(context)) {
AutoWalkJSStack autoWalkJSStack;
// Non-periodic sampling passes Nothing() as the buffer write position to // ProfilingFrameIterator to avoid incorrectly resetting the buffer // position of sampled JIT frames inside the JS engine.
Maybe<uint64_t> samplePosInBuffer; if (!aIsSynchronous) { // aCollector.SamplePositionInBuffer() will return Nothing() when // profiler_suspend_and_sample_thread is called from the background hang // reporter.
samplePosInBuffer = aCollector.SamplePositionInBuffer();
}
for (JS::ProfilingFrameIterator jsIter(context, registerState,
samplePosInBuffer);
!jsIter.done(); ++jsIter) { if (aIsSynchronous || jsIter.isWasm()) {
jsFramesCount +=
jsIter.extractStack(aJsFrames, jsFramesCount, MAX_JS_FRAMES); if (jsFramesCount == MAX_JS_FRAMES) { break;
}
} else {
Maybe<JS::ProfilingFrameIterator::Frame> frame =
jsIter.getPhysicalFrameWithoutLabel(); if (frame.isSome()) {
aJsFrames[jsFramesCount++] = std::move(frame).ref(); if (jsFramesCount == MAX_JS_FRAMES) { break;
}
}
}
if constexpr (StackWalkControl::scIsSupported) { if (aStackWalkControlIfSupported) {
jsIter.getCppEntryRegisters().apply(
[&](const JS::ProfilingFrameIterator::RegisterState&
aCppEntry) {
StackWalkControl::ResumePoint resumePoint;
resumePoint.resumeSp = aCppEntry.sp;
resumePoint.resumeBp = aCppEntry.fp;
resumePoint.resumePc = aCppEntry.pc;
aStackWalkControlIfSupported->AddResumePoint(
std::move(resumePoint));
});
}
} else {
MOZ_ASSERT(!aStackWalkControlIfSupported, "aStackWalkControlIfSupported should be null when " "!StackWalkControl::scIsSupported");
(void)aStackWalkControlIfSupported;
}
}
}
}
return jsFramesCount;
}
// Merges the profiling stack, native stack, and JS stack, outputting the // details to aCollector. staticvoid MergeStacks( bool aIsSynchronous, const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const NativeStack& aNativeStack, ProfilerStackCollector& aCollector,
JsFrame* aJsFrames, uint32_t aJsFramesCount) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// While the profiling stack array is ordered oldest-to-youngest, the JS and // native arrays are ordered youngest-to-oldest. We must add frames to aInfo // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS // and native arrays backwards. Note: this means the terminating condition // jsIndex and nativeIndex is being < 0.
uint32_t profilingStackIndex = 0;
int32_t jsIndex = aJsFramesCount - 1;
int32_t nativeIndex = aNativeStack.mCount - 1;
// Iterate as long as there is at least one frame remaining. while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
nativeIndex >= 0) { // There are 1 to 3 frames available. Find and add the oldest.
uint8_t* profilingStackAddr = nullptr;
uint8_t* jsStackAddr = nullptr;
uint8_t* nativeStackAddr = nullptr;
uint8_t* jsActivationAddr = nullptr;
if (profilingStackIndex != profilingStackFrameCount) { const js::ProfilingStackFrame& profilingStackFrame =
profilingStackFrames[profilingStackIndex];
if (profilingStackFrame.isLabelFrame() ||
profilingStackFrame.isSpMarkerFrame()) {
lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
}
// Skip any JS_OSR frames. Such frames are used when the JS interpreter // enters a jit frame on a loop edge (via on-stack-replacement, or OSR). // To avoid both the profiling stack frame and jit frame being recorded // (and showing up twice), the interpreter marks the interpreter // profiling stack frame as JS_OSR to ensure that it doesn't get counted. if (profilingStackFrame.isOSRFrame()) {
profilingStackIndex++; continue;
}
if (nativeIndex >= 0) {
nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
}
// If there's a native stack frame which has the same SP as a profiling // stack frame, pretend we didn't see the native stack frame. Ditto for a // native stack frame which has the same SP as a JS stack frame. In effect // this means profiling stack frames or JS frames trump conflicting native // frames. if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
jsStackAddr == nativeStackAddr)) {
nativeStackAddr = nullptr;
nativeIndex--;
MOZ_ASSERT(profilingStackAddr || jsStackAddr);
}
// Check to see if profiling stack frame is top-most. if (profilingStackAddr > jsStackAddr &&
profilingStackAddr > nativeStackAddr) {
MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount); const js::ProfilingStackFrame& profilingStackFrame =
profilingStackFrames[profilingStackIndex];
// Sp marker frames are just annotations and should not be recorded in // the profile. if (!profilingStackFrame.isSpMarkerFrame()) { // The JIT only allows the top-most frame to have a nullptr pc.
MOZ_ASSERT_IF(
profilingStackFrame.isJsFrame() && profilingStackFrame.script() &&
!profilingStackFrame.pc(),
&profilingStackFrame ==
&profilingStack.frames[profilingStack.stackSize() - 1]); if (aIsSynchronous && profilingStackFrame.categoryPair() ==
JS::ProfilingCategoryPair::PROFILER) { // For stacks captured synchronously (ie. marker stacks), stop // walking the stack as soon as we enter the profiler category, // to avoid showing profiler internal code in marker stacks. return;
}
aCollector.CollectProfilingStackFrame(profilingStackFrame);
}
profilingStackIndex++; continue;
}
// Check to see if JS jit stack frame is top-most if (jsStackAddr > nativeStackAddr) {
MOZ_ASSERT(jsIndex >= 0); const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex];
jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress; // Stringifying non-wasm JIT frames is delayed until streaming time. To // re-lookup the entry in the JitcodeGlobalTable, we need to store the // JIT code address (OptInfoAddr) in the circular buffer. // // Note that we cannot do this when we are sychronously sampling the // current thread; that is, when called from profiler_get_backtrace. The // captured backtrace is usually externally stored for an indeterminate // amount of time, such as in nsRefreshDriver. Problematically, the // stored backtrace may be alive across a GC during which the profiler // itself is disabled. In that case, the JS engine is free to discard its // JIT code. This means that if we inserted such OptInfoAddr entries into // the buffer, nsRefreshDriver would now be holding on to a backtrace // with stale JIT code return addresses. if (aIsSynchronous ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_WasmIon ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_WasmBaseline ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_WasmOther) {
aCollector.CollectWasmFrame(jsFrame.profilingCategory(), jsFrame.label);
} elseif (jsFrame.kind ==
JS::ProfilingFrameIterator::Frame_BaselineInterpreter) { // Materialize a ProfilingStackFrame similar to the C++ Interpreter. We // also set the IS_BLINTERP_FRAME flag to differentiate though.
JSScript* script = jsFrame.interpreterScript;
jsbytecode* pc = jsFrame.interpreterPC();
js::ProfilingStackFrame stackFrame;
constexpr uint32_t ExtraFlags =
uint32_t(js::ProfilingStackFrame::Flags::IS_BLINTERP_FRAME);
stackFrame.initJsFrame<JS::ProfilingCategoryPair::JS_BaselineInterpret,
ExtraFlags>("", jsFrame.label, script, pc,
jsFrame.realmID);
aCollector.CollectProfilingStackFrame(stackFrame);
} else {
MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
aCollector.CollectJitReturnAddr(jsFrame.returnAddress());
}
jsIndex--; continue;
}
// If we reach here, there must be a native stack frame and it must be the // greatest frame. if (nativeStackAddr && // If the latest JS frame was JIT, this could be the native frame that // corresponds to it. In that case, skip the native frame, because // there's no need for the same frame to be present twice in the stack. // The JS frame can be considered the symbolicated version of the native // frame.
(!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) && // This might still be a JIT operation, check to make sure that is not // in range of the NEXT JavaScript's stacks' activation address.
(!jsActivationAddr || nativeStackAddr > jsActivationAddr)) {
MOZ_ASSERT(nativeIndex >= 0); void* addr = (void*)aNativeStack.mPCs[nativeIndex];
aCollector.CollectNativeLeafAddr(addr);
} if (nativeIndex >= 0) {
nativeIndex--;
}
}
// Update the JS context with the current profile sample buffer generation. // // Only do this for periodic samples. We don't want to do this for // synchronous samples, and we also don't want to do it for calls to // profiler_suspend_and_sample_thread() from the background hang reporter - // in that case, aCollector.BufferRangeStart() will return Nothing(). if (!aIsSynchronous) {
aCollector.BufferRangeStart().apply(
[&aThreadData](uint64_t aBufferRangeStart) {
JSContext* context = aThreadData.GetJSContext(); if (context) {
JS::SetJSContextProfilerSampleBufferRangeStart(context,
aBufferRangeStart);
}
});
}
}
#ifdefined(USE_FRAME_POINTER_STACK_WALK) staticvoid DoFramePointerBacktrace( const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// Make a local copy of the Registers, to allow modifications.
Registers regs = aRegs;
// Start with the current function. We use 0 as the frame number here because // the FramePointerStackWalk() call below will use 1..N. This is a bit weird // but it doesn't matter because StackWalkCallback() doesn't use the frame // number argument.
StackWalkCallback(/* frameNum */ 0, regs.mPC, regs.mSP, &aNativeStack);
if constexpr (!StackWalkControl::scIsSupported) { break;
} else { if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { // No room to add more frames. break;
} if (!aStackWalkControlIfSupported ||
aStackWalkControlIfSupported->ResumePointCount() == 0) { // No resume information. break;
} void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1]; if (previousResumeSp &&
((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) { // No progress after the previous resume point. break;
} const StackWalkControl::ResumePoint* resumePoint =
aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP); if (!resumePoint) { break;
} void* sp = resumePoint->resumeSp; if (!sp) { // Null SP in a resume point means we stop here. break;
} void* pc = resumePoint->resumePc;
StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
&aNativeStack);
++aNativeStack.mCount; if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { break;
} // Prepare context to resume stack walking.
regs.mPC = (Address)pc;
regs.mSP = (Address)sp;
regs.mFP = (Address)resumePoint->resumeBp;
previousResumeSp = sp;
}
}
} #endif
#ifdefined(USE_MOZ_STACK_WALK) staticvoid DoMozStackWalkBacktrace( const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// Start with the current function. We use 0 as the frame number here because // the MozStackWalkThread() call below will use 1..N. This is a bit weird but // it doesn't matter because StackWalkCallback() doesn't use the frame number // argument.
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
// This is to check forward-progress after using a resume point. void* previousResumeSp = nullptr;
for (;;) {
MozStackWalkThread(StackWalkCallback,
uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
&aNativeStack, thread, context);
if constexpr (!StackWalkControl::scIsSupported) { break;
} else { if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { // No room to add more frames. break;
} if (!aStackWalkControlIfSupported ||
aStackWalkControlIfSupported->ResumePointCount() == 0) { // No resume information. break;
} void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1]; if (previousResumeSp &&
((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) { // No progress after the previous resume point. break;
} const StackWalkControl::ResumePoint* resumePoint =
aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP); if (!resumePoint) { break;
} void* sp = resumePoint->resumeSp; if (!sp) { // Null SP in a resume point means we stop here. break;
} void* pc = resumePoint->resumePc;
StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
&aNativeStack);
++aNativeStack.mCount; if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { break;
} // Prepare context to resume stack walking.
memset(&context_buf, 0, sizeof(CONTEXT));
context_buf.ContextFlags = CONTEXT_FULL; # ifdefined(_M_AMD64)
context_buf.Rsp = (DWORD64)sp;
context_buf.Rbp = (DWORD64)resumePoint->resumeBp;
context_buf.Rip = (DWORD64)pc; # else
static_assert(!StackWalkControl::scIsSupported, "Mismatched support between StackWalkControl and " "DoMozStackWalkBacktrace"); # endif
previousResumeSp = sp;
}
}
} #endif
#ifdef USE_EHABI_STACKWALK staticvoid DoEHABIBacktrace( const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// See the comment at the callsite for why this function is necessary. # ifdefined(MOZ_HAVE_ASAN_IGNORE)
MOZ_ASAN_IGNORE staticvoid ASAN_memcpy(void* aDst, constvoid* aSrc,
size_t aLen) { // The obvious thing to do here is call memcpy(). However, although // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the // false positive still manifests! So we must implement memcpy() ourselves // within this function. char* dst = static_cast<char*>(aDst); constchar* src = static_cast<constchar*>(aSrc);
for (size_t i = 0; i < aLen; i++) {
dst[i] = src[i];
}
} # endif
staticvoid DoLULBacktrace( const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the // stack's registered top point. Do some basic validity checks too. This // assumes that the TaggedUWord holding the stack pointer value is valid, but // it should be, since it was constructed that way in the code just above.
// We could construct |stackImg| so that LUL reads directly from the stack in // question, rather than from a copy of it. That would reduce overhead and // space use a bit. However, it gives a problem with dynamic analysis tools // (ASan, TSan, Valgrind) which is that such tools will report invalid or // racing memory accesses, and such accesses will be reported deep inside LUL. // By taking a copy here, we can either sanitise the copy (for Valgrind) or // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have // to try and suppress errors inside LUL. // // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks // observed in some minutes of testing, whilst keeping the size of this // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in // practice are small, 4KB or less, and so the copy costs are insignificant // compared to other profiler overhead. // // |stackImg| is allocated on this (the sampling thread's) stack. That // implies that the frame for this function is at least N_STACK_BYTES large. // In general it would be considered unacceptable to have such a large frame // on a stack, but it only exists for the unwinder thread, and so is not // expected to be a problem. Allocating it on the heap is troublesome because // this function runs whilst the sampled thread is suspended, so any heap // allocation risks deadlock. Allocating it as a global variable is not // thread safe, which would be a problem if we ever allow multiple sampler // threads. Hence allocating it on the stack seems to be the least-worst // option.
lul::StackImage stackImg;
{ # ifdefined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ defined(GP_PLAT_amd64_freebsd)
uintptr_t rEDZONE_SIZE = 128;
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ defined(GP_PLAT_arm64_freebsd)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_mips64_linux)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; # else # error "Unknown plat" # endif
uintptr_t end = reinterpret_cast<uintptr_t>(aThreadData.StackTop());
uintptr_t ws = sizeof(void*);
start &= ~(ws - 1);
end &= ~(ws - 1);
uintptr_t nToCopy = 0; if (start < end) {
nToCopy = end - start; if (nToCopy >= 1024u * 1024u) { // start is abnormally far from end, possibly due to some special code // that uses a separate stack elsewhere (e.g.: rr). In this case we just // give up on this sample.
nToCopy = 0;
} elseif (nToCopy > lul::N_STACK_BYTES) {
nToCopy = lul::N_STACK_BYTES;
}
}
MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
stackImg.mLen = nToCopy;
stackImg.mStartAvma = start; if (nToCopy > 0) { // If this is a vanilla memcpy(), ASAN makes the following complaint: // // ERROR: AddressSanitizer: stack-buffer-underflow ... // ... // HINT: this may be a false positive if your program uses some custom // stack unwind mechanism or swapcontext // // This code is very much a custom stack unwind mechanism! So we use an // alternative memcpy() implementation that is ignored by ASAN. # ifdefined(MOZ_HAVE_ASAN_IGNORE)
ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy); # else
memcpy(&stackImg.mContents[0], (void*)start, nToCopy); # endif
(void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
}
}
// Update stats in the LUL stats object. Unfortunately this requires // three global memory operations.
lul->mStats.mContext += 1;
lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
lul->mStats.mFP += framePointerFramesAcquired;
}
#endif
#ifdef HAVE_NATIVE_UNWIND staticvoid DoNativeBacktrace( const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) { // This method determines which stackwalker is used for periodic and // synchronous samples. (Backtrace samples are treated differently, see // profiler_suspend_and_sample_thread() for details). The only part of the // ordering that matters is that LUL must precede FRAME_POINTER, because on // Linux they can both be present. # ifdefined(USE_LUL_STACKWALK)
DoLULBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported); # elif defined(USE_EHABI_STACKWALK)
DoEHABIBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported); # elif defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported); # elif defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported); # else # error "Invalid configuration" # endif
}
void DoNativeBacktraceDirect(constvoid* stackTop, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) { # ifdefined(MOZ_PROFILING) # ifdefined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) // StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack); void* previousResumeSp = nullptr; for (;;) {
MozStackWalk(StackWalkCallback, stackTop,
uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
&aNativeStack); if constexpr (!StackWalkControl::scIsSupported) { break;
} else { if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { // No room to add more frames. break;
} if (!aStackWalkControlIfSupported ||
aStackWalkControlIfSupported->ResumePointCount() == 0) { // No resume information. break;
} void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1]; if (previousResumeSp &&
((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) { // No progress after the previous resume point. break;
} const StackWalkControl::ResumePoint* resumePoint =
aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP); if (!resumePoint) { break;
} void* sp = resumePoint->resumeSp; if (!sp) { // Null SP in a resume point means we stop here. break;
} void* pc = resumePoint->resumePc;
StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
&aNativeStack);
++aNativeStack.mCount; if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { break;
}
previousResumeSp = sp;
}
} # else// defined(USE_FRAME_POINTER_STACK_WALK) || // defined(USE_MOZ_STACK_WALK)
MOZ_CRASH( "Cannot call DoNativeBacktraceDirect without either " "USE_FRAME_POINTER_STACK_WALK USE_MOZ_STACK_WALK"); # endif // defined(USE_FRAME_POINTER_STACK_WALK) || // defined(USE_MOZ_STACK_WALK) # else
aNativeStack.mCount = 0; # endif
} #endif
// Writes some components shared by periodic and synchronous profiles to // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample() // and DoPeriodicSample().) // // The grammar for entry sequences is in a comment above // ProfileBuffer::StreamSamplesToJSON. staticinlinevoid DoSharedSample( bool aIsSynchronous, uint32_t aFeatures, const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
JsFrame* aJsFrames, const Registers& aRegs, uint64_t aSamplePos,
uint64_t aBufferRangeStart, ProfileBuffer& aBuffer,
StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) { // WARNING: this function runs within the profiler's "critical section".
MOZ_ASSERT(!aBuffer.IsThreadSafe(), "Mutexes cannot be used inside this critical section");
// We can't walk the whole native stack, but we can record the top frame. if (aCaptureOptions == StackCaptureOptions::Full) {
aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
}
}
}
// Writes the components of a synchronous sample to the given ProfileBuffer. staticvoid DoSyncSample(
uint32_t aFeatures, const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const TimeStamp& aNow, const Registers& aRegs, ProfileBuffer& aBuffer,
StackCaptureOptions aCaptureOptions) { // WARNING: this function runs within the profiler's "critical section".
MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack, "DoSyncSample should not be called when no capture is needed");
if (!aThreadData.GetJSContext()) { // No JSContext, there is no JS frame buffer (and no need for it).
DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData, /* aJsFrames = */ nullptr, aRegs, samplePos,
bufferRangeStart, aBuffer, aCaptureOptions);
} else { // JSContext is present, we need to lock the thread data to access the JS // frame buffer.
ThreadRegistration::WithOnThreadRef([&](ThreadRegistration::OnThreadRef
aOnThreadRef) {
aOnThreadRef.WithConstLockedRWOnThread(
[&](const ThreadRegistration::LockedRWOnThread& aLockedThreadData) {
DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
aLockedThreadData.GetJsFrameBuffer(), aRegs,
samplePos, bufferRangeStart, aBuffer,
aCaptureOptions);
});
});
}
}
// Writes the components of a periodic sample to ActivePS's ProfileBuffer. // The ThreadId entry is already written in the main ProfileBuffer, its location // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different). staticinlinevoid DoPeriodicSample(
PSLockRef aLock, const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
ProfileBuffer& aBuffer) { // WARNING: this function runs within the profiler's "critical section".
staticvoid StreamMarkerSchema(SpliceableJSONWriter& aWriter) { // Get an array view with all registered marker-type-specific functions.
base_profiler_markers_detail::Streaming::LockedMarkerTypeFunctionsList
markerTypeFunctionsArray; // List of streamed marker names, this is used to spot duplicates.
std::set<std::string> names; // Stream the display schema for each different one. (Duplications may come // from the same code potentially living in different libraries.) for (constauto& markerTypeFunctions : markerTypeFunctionsArray) { auto name = markerTypeFunctions.mMarkerTypeNameFunction(); // std::set.insert(T&&) returns a pair, its `second` is true if the element // was actually inserted (i.e., it was not there yet.) constbool didInsert =
names.insert(std::string(name.data(), name.size())).second; if (didInsert) {
markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
}
}
// Now stream the Rust marker schemas. Passing the names set as a void pointer // as well, so we can continue checking if the schemes are added already in // the Rust side.
profiler::ffi::gecko_profiler_stream_marker_schemas(
&aWriter, static_cast<void*>(&names));
}
// Some meta information that is better recorded before streaming the profile. // This is *not* intended to be cached, as some values could change between // profiling sessions. struct PreRecordedMetaInformation { bool mAsyncStacks;
// This struct should only live on the stack, so it's fine to use Auto // strings.
nsAutoCString mHttpPlatform;
nsAutoCString mHttpOscpu;
nsAutoCString mHttpMisc;
// This function should be called out of the profiler lock. // It gathers non-trivial data that doesn't require the profiler to stop, or for // which the request could theoretically deadlock if the profiler is locked. static PreRecordedMetaInformation PreRecordMetaInformation( bool aShutdown = false) {
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
PreRecordedMetaInformation info = {}; // Aggregate-init all fields.
if (!NS_IsMainThread()) { // Leave these properties out if we're not on the main thread. // At the moment, the only case in which this function is called on a // background thread is if we're in a content process and are going to // send this profile to the parent process. In that case, the parent // process profile's "meta" object already has the rest of the properties, // and the parent process profile is dumped on that process's main thread. return info;
}
#ifdefined(XP_MACOSX) // On Mac, the http "oscpu" is capped at 10.15, so we need to get the real // OS version directly. int major = 0; int minor = 0; int bugfix = 0;
nsCocoaFeatures::GetSystemVersion(major, minor, bugfix); if (major != 0) {
info.mHttpOscpu.AppendLiteral("macOS ");
info.mHttpOscpu.AppendInt(major);
info.mHttpOscpu.AppendLiteral(".");
info.mHttpOscpu.AppendInt(minor);
info.mHttpOscpu.AppendLiteral(".");
info.mHttpOscpu.AppendInt(bugfix);
} else #endif #ifdefined(GP_OS_windows) // On Windows, the http "oscpu" is capped at Windows 10, so we need to get // the real OS version directly.
OSVERSIONINFO ovi = {sizeof(OSVERSIONINFO)}; if (GetVersionEx(&ovi)) {
info.mHttpOscpu.AppendLiteral("Windows "); // The major version returned for Windows 11 is 10, but we can // identify it from the build number.
info.mHttpOscpu.AppendInt(
ovi.dwBuildNumber >= 22000 ? 11 : int32_t(ovi.dwMajorVersion));
info.mHttpOscpu.AppendLiteral(".");
info.mHttpOscpu.AppendInt(int32_t(ovi.dwMinorVersion)); # ifdefined(_ARM64_)
info.mHttpOscpu.AppendLiteral(" Arm64"); # endif
info.mHttpOscpu.AppendLiteral("; build=");
info.mHttpOscpu.AppendInt(int32_t(ovi.dwBuildNumber));
} else #endif
{
Unused << http->GetOscpu(info.mHttpOscpu);
}
// Firefox version is capped to 109.0 in the http "misc" field due to some // webcompat issues (Bug 1805967). We need to put the real version instead.
info.mHttpMisc.AssignLiteral("rv:");
info.mHttpMisc.AppendLiteral(MOZILLA_UAVERSION);
}
ProcessInfo processInfo = {}; // Aggregate-init all fields to false/zeroes. if (NS_SUCCEEDED(CollectProcessInfo(processInfo))) {
info.mProcessInfoCpuCount = processInfo.cpuCount;
info.mProcessInfoCpuCores = processInfo.cpuCores;
info.mProcessInfoCpuName = processInfo.cpuName;
}
return info;
}
// Implemented in platform-specific cpps, to add object properties describing // the units of CPU measurements in samples. staticvoid StreamMetaPlatformSampleUnits(PSLockRef aLock,
SpliceableJSONWriter& aWriter);
// The "startTime" field holds the number of milliseconds since midnight // January 1, 1970 GMT. This grotty code computes (Now - (Now - // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form. // Note: This is the only absolute time in the profile! All other timestamps // are relative to this startTime.
TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
aWriter.DoubleProperty( "startTime", static_cast<double>(PR_Now() / 1000.0 - delta.ToMilliseconds()));
if (!NS_IsMainThread()) { // Leave the rest of the properties out if we're not on the main thread. // At the moment, the only case in which this function is called on a // background thread is if we're in a content process and are going to // send this profile to the parent process. In that case, the parent // process profile's "meta" object already has the rest of the properties, // and the parent process profile is dumped on that process's main thread. return;
}
// We should avoid collecting extension metadata for profiler when there is no // observer service, since a ExtensionPolicyService could not be created then. if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
aWriter.StartObjectProperty("extensions");
{
{
JSONSchemaWriter schema(aWriter);
schema.WriteField("id");
schema.WriteField("name");
schema.WriteField("baseURL");
}
static JS::ProfilingCategoryPair InferJavaCategory(nsACString& aName) { if (aName.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) { return JS::ProfilingCategoryPair::IDLE;
} if (aName.EqualsLiteral("java.lang.Object.wait()")) { return JS::ProfilingCategoryPair::JAVA_BLOCKED;
} if (StartsWith(aName, "android.") || StartsWith(aName, "com.android.")) { return JS::ProfilingCategoryPair::JAVA_ANDROID;
} if (StartsWith(aName, "mozilla.") || StartsWith(aName, "org.mozilla.")) { return JS::ProfilingCategoryPair::JAVA_MOZILLA;
} if (StartsWith(aName, "java.") || StartsWith(aName, "sun.") ||
StartsWith(aName, "com.sun.")) { return JS::ProfilingCategoryPair::JAVA_LANGUAGE;
} if (StartsWith(aName, "kotlin.") || StartsWith(aName, "kotlinx.")) { return JS::ProfilingCategoryPair::JAVA_KOTLIN;
} if (StartsWith(aName, "androidx.")) { return JS::ProfilingCategoryPair::JAVA_ANDROIDX;
} return JS::ProfilingCategoryPair::OTHER;
}
// Marker type for Java markers without any details. struct JavaMarker { static constexpr Span<constchar> MarkerTypeName() { return MakeStringSpan("Java");
} staticvoid StreamJSONMarkerData(
baseprofiler::SpliceableJSONWriter& aWriter) {} static MarkerSchema MarkerTypeDisplay() { using MS = MarkerSchema;
MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart,
MS::Location::MarkerTable};
schema.SetAllLabels("{marker.name}"); return schema;
}
};
// Marker type for Java markers with a detail field. struct JavaMarkerWithDetails { static constexpr Span<constchar> MarkerTypeName() { return MakeStringSpan("JavaWithDetails");
} staticvoid StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, const ProfilerString8View& aText) { // This (currently) needs to be called "name" to be searchable on the // front-end.
aWriter.StringProperty("name", aText);
} static MarkerSchema MarkerTypeDisplay() { using MS = MarkerSchema;
MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart,
MS::Location::MarkerTable};
schema.SetTooltipLabel("{marker.name}");
schema.SetChartLabel("{marker.data.name}");
schema.SetTableLabel("{marker.name} - {marker.data.name}");
schema.AddKeyLabelFormatSearchable("name", "Details", MS::Format::String,
MS::Searchable::Searchable); return schema;
}
};
staticvoid CollectJavaThreadProfileData(
nsTArray<java::GeckoJavaSampler::ThreadInfo::LocalRef>& javaThreads,
ProfileBuffer& aProfileBuffer) { // Retrieve metadata about the threads. constauto threadCount = java::GeckoJavaSampler::GetRegisteredThreadCount(); for (int i = 0; i < threadCount; i++) {
javaThreads.AppendElement(
java::GeckoJavaSampler::GetRegisteredThreadInfo(i));
}
// locked_profiler_start uses sample count is 1000 for Java thread. // This entry size is enough now, but we might have to estimate it // if we can customize it // Pass the samples int sampleId = 0; while (true) { constauto threadId = java::GeckoJavaSampler::GetThreadId(sampleId); double sampleTime = java::GeckoJavaSampler::GetSampleTime(sampleId); if (threadId == 0 || sampleTime == 0.0) { break;
}
aProfileBuffer.AddThreadIdEntry(ProfilerThreadId::FromNumber(threadId));
aProfileBuffer.AddEntry(ProfileBufferEntry::Time(sampleTime)); int frameId = 0; while (true) {
jni::String::LocalRef frameName =
java::GeckoJavaSampler::GetFrameName(sampleId, frameId++); if (!frameName) { break;
}
nsCString frameNameString = frameName->ToCString();
// Pass the markers now while (true) { // Gets the data from the Android UI thread only.
java::GeckoJavaSampler::Marker::LocalRef marker =
java::GeckoJavaSampler::PollNextMarker(); if (!marker) { // All markers are transferred. break;
}
// Get all the marker information from the Java thread using JNI. constauto threadId = ProfilerThreadId::FromNumber(marker->GetThreadId());
nsCString markerName = marker->GetMarkerName()->ToCString();
jni::String::LocalRef text = marker->GetMarkerText();
TimeStamp startTime =
CorePS::ProcessStartTime() +
TimeDuration::FromMilliseconds(marker->GetStartTime());
double endTimeMs = marker->GetEndTime(); // A marker can be either a duration with start and end, or a point in time // with only startTime. If endTime is 0, this means it's a point in time.
TimeStamp endTime = endTimeMs == 0
? startTime
: CorePS::ProcessStartTime() +
TimeDuration::FromMilliseconds(endTimeMs);
MarkerTiming timing = endTimeMs == 0
? MarkerTiming::InstantAt(startTime)
: MarkerTiming::Interval(startTime, endTime);
if (!text) { // This marker doesn't have a text.
AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
geckoprofiler::category::JAVA_ANDROID,
{MarkerThreadId(threadId), std::move(timing)},
JavaMarker{});
} else { // This marker has a text.
AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
geckoprofiler::category::JAVA_ANDROID,
{MarkerThreadId(threadId), std::move(timing)},
JavaMarkerWithDetails{}, text->ToCString());
}
}
} #endif
#ifdef DEBUG
PRIntervalTime slowWithSleeps = 0; if (!XRE_IsParentProcess()) { for (constauto& filter : ActivePS::Filters(aLock)) { if (filter == "test-debug-child-slow-json") {
LOG("test-debug-child-slow-json"); // There are 10 slow-downs below, each will sleep 250ms, for a total of // 2.5s, which should trigger the first progress request after 1s, and // the next progress which will have advanced further, so this profile // shouldn't get dropped.
slowWithSleeps = PR_MillisecondsToInterval(250);
} elseif (filter == "test-debug-child-very-slow-json") {
LOG("test-debug-child-very-slow-json"); // Wait for more than 2s without any progress, which should get this // profile discarded.
PR_Sleep(PR_SecondsToInterval(5));
}
}
} # define SLOW_DOWN_FOR_TESTING() \ if (slowWithSleeps != 0) { \
DEBUG_LOG("progress=%.0f%%, sleep...", \
aProgressLogger.GetGlobalProgress().ToDouble() * 100.0); \
PR_Sleep(slowWithSleeps); \
} #else// #ifdef DEBUG # define SLOW_DOWN_FOR_TESTING() /* No slow-downs */ #endif// #ifdef DEBUG #else
// If there is a set "Window length", discard older data.
Maybe<double> durationS = ActivePS::Duration(aLock); if (durationS.isSome()) { constdouble durationStartMs = collectionStartMs - *durationS * 1000;
buffer.DiscardSamplesBeforeTime(durationStartMs);
}
aProgressLogger.SetLocalProgress(2_pc, "Discarded old data");
if (aWriter.Failed()) { return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
#ifdefined(GP_OS_android) // Java thread profile data should be collected before serializing the meta // object. This is because Java thread adds some markers with marker schema // objects. And these objects should be added before the serialization of the // `profile.meta.markerSchema` array, so these marker schema objects can also // be serialized properly. That's why java thread profile data needs to be // done before everything.
// We are allocating it chunk by chunk. So this will not allocate 64 MiB // at once. This size should be more than enough for java threads. // This buffer is being created for each process but Android has // relatively fewer processes compared to desktop, so it's okay here.
mozilla::ProfileBufferChunkManagerWithLocalLimit javaChunkManager(
64 * 1024 * 1024, 1024 * 1024);
ProfileChunkedBuffer javaBufferManager(
ProfileChunkedBuffer::ThreadSafety::WithoutMutex, javaChunkManager);
ProfileBuffer javaBuffer(javaBufferManager);
if (aWriter.Failed()) { return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Lists the samples for each thread profile
aWriter.StartArrayProperty("threads");
{
ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
aProgressLogger.SetLocalProgress(15_pc, "Discarded expired profiles");
if (aWriter.Failed()) { return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Prepare the streaming context for each thread.
ProcessStreamingContext processStreamingContext(
threadCount, aWriter.SourceFailureLatch(), CorePS::ProcessStartTime(),
aSinceTime); for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo(
20_pc, threadCount, "Preparing thread streaming contexts...")) {
ActivePS::ProfiledThreadListElement& thread = threads[i];
MOZ_RELEASE_ASSERT(thread.mProfiledThreadData);
processStreamingContext.AddThreadStreamingContext(
*thread.mProfiledThreadData, buffer, thread.mJSContext, aService,
std::move(progressLogger)); if (aWriter.Failed()) { return Err(ProfilerError::JsonGenerationFailed);
}
}
SLOW_DOWN_FOR_TESTING();
// Read the buffer once, and extract all samples and markers that the // context expects.
buffer.StreamSamplesAndMarkersToJSON(
processStreamingContext, aProgressLogger.CreateSubLoggerTo( "Processing samples and markers...", 80_pc, "Processed samples and markers"));
if (aWriter.Failed()) { return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Stream each thread from the pre-filled context.
ThreadStreamingContext* const contextListBegin =
processStreamingContext.begin();
MOZ_ASSERT(uint32_t(processStreamingContext.end() - contextListBegin) ==
threadCount); for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo(
92_pc, threadCount, "Streaming threads...")) {
ThreadStreamingContext& threadStreamingContext = contextListBegin[i];
threadStreamingContext.FinalizeWriter();
threadStreamingContext.mProfiledThreadData.StreamJSON(
std::move(threadStreamingContext), aWriter,
CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock),
CorePS::ProcessStartTime(), aService, std::move(progressLogger)); if (aWriter.Failed()) { return Err(ProfilerError::JsonGenerationFailed);
}
}
aProgressLogger.SetLocalProgress(92_pc, "Wrote samples and markers");
#ifdefined(GP_OS_android) if (ActivePS::FeatureJava(aLock)) { for (java::GeckoJavaSampler::ThreadInfo::LocalRef& threadInfo :
javaThreads) {
ProfiledThreadData threadData(ThreadRegistrationInfo{
threadInfo->GetName()->ToCString().BeginReading(),
ProfilerThreadId::FromNumber(threadInfo->GetId()), false,
CorePS::ProcessStartTime()});
// Record timestamps for the collection into the buffer, so that consumers // know why we didn't collect any samples for its duration. // We put these entries into the buffer after we've collected the profile, // so they'll be visible for the *next* profile collection (if they haven't // been overwritten due to buffer wraparound by then).
buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
// Keep this internal function non-static, so it may be used by tests.
ProfilerResult<ProfileGenerationAdditionalInformation>
do_profiler_stream_json_for_this_process(
SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown,
ProfilerCodeAddressService* aService,
mozilla::ProgressLogger aProgressLogger) {
LOG("profiler_stream_json_for_this_process");
if (aWriter.Failed()) { return Err(ProfilerError::JsonGenerationFailed);
} return additionalInfo;
}
ProfilerResult<ProfileGenerationAdditionalInformation>
profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown,
ProfilerCodeAddressService* aService,
mozilla::ProgressLogger aProgressLogger) {
MOZ_RELEASE_ASSERT(
!XRE_IsParentProcess() || NS_IsMainThread(), "In the parent process, profiles should only be generated from the main " "thread, otherwise they will be incomplete.");
printf( "\n" "Profiler environment variable usage:\n" "\n" " MOZ_PROFILER_HELP\n" " If set to any value, prints this message.\n" " Use MOZ_BASE_PROFILER_HELP for BaseProfiler help.\n" "\n" " MOZ_LOG\n" " Enables logging. The levels of logging available are\n" " 'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n" "\n" " MOZ_PROFILER_STARTUP\n" " If set to any value other than '' or '0'/'N'/'n', starts the\n" " profiler immediately on start-up.\n" " Useful if you want profile code that runs very early.\n" "\n" " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n" " If MOZ_PROFILER_STARTUP is set, specifies the number of entries per\n" " process in the profiler's circular buffer when the profiler is first\n" " started.\n" " If unset, the platform default is used:\n" " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n" " (%u bytes per entry -> %u or %u total bytes per process)\n" " Optional units in bytes: KB, KiB, MB, MiB, GB, GiB\n" "\n" " MOZ_PROFILER_STARTUP_DURATION=<1..>\n" " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time of\n" " entries in the the profiler's circular buffer when the profiler is\n" " first started, in seconds.\n" " If unset, the life time of the entries will only be restricted by\n" " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n" " additional time duration restriction will be applied.\n" "\n" " MOZ_PROFILER_STARTUP_INTERVAL=<1..%d>\n" " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n" " measured in milliseconds, when the profiler is first started.\n" " If unset, the platform default is used.\n" "\n" " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n" " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n" " the integer value of the features bitfield.\n" " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n" "\n" " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n" " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n" " a comma-separated list of strings.\n" " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n" " If unset, the platform default is used.\n" "\n" " Features: (x=unavailable, D/d=default/unavailable,\n" " S/s=MOZ_PROFILER_STARTUP extra default/unavailable)\n", unsigned(scMinimumBufferEntries), unsigned(scMaximumBufferEntries), unsigned(PROFILER_DEFAULT_ENTRIES.Value()), unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value()), unsigned(scBytesPerEntry), unsigned(PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry), unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * scBytesPerEntry),
PROFILER_MAX_INTERVAL);
printf( " - \"default\" (All above D+S defaults)\n" "\n" " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n" " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n" " comma-separated list of strings. A given thread will be sampled if\n" " any of the filters is a case-insensitive substring of the thread\n" " name. If unset, a default is used.\n" "\n" " MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID=<Number>\n" " This variable is used to propagate the activeTabID of\n" " the profiler init params to subprocesses.\n" "\n" " MOZ_PROFILER_SHUTDOWN=<Filename>\n" " If set, the profiler saves a profile to the named file on shutdown.\n" " If the Filename contains \"%%p\", this will be replaced with the'\n" " process id of the parent process.\n" "\n" " MOZ_PROFILER_SYMBOLICATE\n" " If set, the profiler will pre-symbolicate profiles.\n" " *Note* This will add a significant pause when gathering data, and\n" " is intended mainly for local development.\n" "\n" " MOZ_PROFILER_LUL_TEST\n" " If set to any value, runs LUL unit tests at startup.\n" "\n" " This platform %s native unwinding.\n" "\n", #ifdefined(HAVE_NATIVE_UNWIND) "supports" #else "does not support" #endif
);
}
//////////////////////////////////////////////////////////////////////// // BEGIN Sampler
// Sampler performs setup and teardown of the state required to sample with the // profiler. Sampler may exist when ActivePS is not present. // // SuspendAndSampleAndResumeThread must only be called from a single thread, // and must not sample the thread it is being called from. A separate Sampler // instance must be used for each thread which wants to capture samples.
// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING // // With the exception of SamplerThread, all Sampler objects must be Disable-d // before releasing the lock which was used to create them. This avoids races // on linux with the SIGPROF signal handler.
class Sampler { public: // Sets up the profiler such that it can begin sampling. explicit Sampler(PSLockRef aLock);
// Disable the sampler, restoring it to its previous state. This must be // called once, and only once, before the Sampler is destroyed. void Disable(PSLockRef aLock);
// This method suspends and resumes the samplee thread. It calls the passed-in // function-like object aProcessRegs (passing it a populated |const // Registers&| arg) while the samplee thread is suspended. Note that // the aProcessRegs function must be very careful not to do anything that // requires a lock, since we may have interrupted the thread at any point. // As an example, you can't call TimeStamp::Now() since on windows it // takes a lock on the performance counter. // // Func must be a function-like object of type `void()`. template <typename Func> void SuspendAndSampleAndResumeThread(
PSLockRef aLock, const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, const TimeStamp& aNow, const Func& aProcessRegs);
private: #ifdefined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) // Used to restore the SIGPROF handler when ours is removed. struct sigaction mOldSigprofHandler;
// This process' ID. Needed as an argument for tgkill in // SuspendAndSampleAndResumeThread.
ProfilerProcessId mMyPid;
// The sampler thread's ID. Used to assert that it is not sampling itself, // which would lead to deadlock.
ProfilerThreadId mSamplerTid;
public: // This is the one-and-only variable used to communicate between the sampler // thread and the samplee thread's signal handler. It's static because the // samplee thread's signal handler is static. staticstruct SigHandlerCoordinator* sSigHandlerCoordinator; #endif
};
// END Sampler ////////////////////////////////////////////////////////////////////////
// Platform-specific function that retrieves per-thread CPU measurements. static RunningTimes GetThreadRunningTimesDiff(
PSLockRef aLock,
ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData); // Platform-specific function that *may* discard CPU measurements since the // previous call to GetThreadRunningTimesDiff, if the way to suspend threads on // this platform may add running times to that thread. // No-op otherwise, if suspending a thread doesn't make it work. staticvoid DiscardSuspendedThreadRunningTimes(
PSLockRef aLock,
ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData);
// Platform-specific function that retrieves process CPU measurements. static RunningTimes GetProcessRunningTimesDiff(
PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated);
// Template function to be used by `GetThreadRunningTimesDiff()` (unless some // platform has a better way to achieve this). // It help perform CPU measurements and tie them to a timestamp, such that the // measurements and timestamp are very close together. // This is necessary, because the relative CPU usage is computed by dividing // consecutive CPU measurements by their timestamp difference; if there was an // unexpected big gap, it could skew this computation and produce impossible // spikes that would hide the rest of the data. See bug 1685938 for more info. // Note that this may call the measurement function more than once; it is // assumed to normally be fast. // This was verified experimentally, but there is currently no regression // testing for it; see follow-up bug 1687402. template <typename GetCPURunningTimesFunction>
RunningTimes GetRunningTimesWithTightTimestamp(
GetCPURunningTimesFunction&& aGetCPURunningTimesFunction) { // Once per process, compute a threshold over which running times and their // timestamp is considered too far apart. staticconst TimeDuration scMaxRunningTimesReadDuration = [&]() { // Run the main CPU measurements + timestamp a number of times and capture // their durations.
constexpr int loops = 128;
TimeDuration durations[loops];
RunningTimes runningTimes;
TimeStamp before = TimeStamp::Now(); for (int i = 0; i < loops; ++i) {
AUTO_PROFILER_STATS(GetRunningTimes_MaxRunningTimesReadDuration);
aGetCPURunningTimesFunction(runningTimes); const TimeStamp after = TimeStamp::Now();
durations[i] = after - before;
before = after;
} // Move median duration to the middle.
std::nth_element(&durations[0], &durations[loops / 2], &durations[loops]); // Use median*8 as cut-off point. // Typical durations should be around a microsecond, the cut-off should then // be around 10 microseconds, well below the expected minimum inter-sample // interval (observed as a few milliseconds), so overall this should keep // cpu/interval spikes return durations[loops / 2] * 8;
}();
// Record CPU measurements between two timestamps.
RunningTimes runningTimes;
TimeStamp before = TimeStamp::Now();
aGetCPURunningTimesFunction(runningTimes);
TimeStamp after = TimeStamp::Now(); const TimeDuration duration = after - before;
// In most cases, the above should be quick enough. But if not (e.g., because // of an OS context switch), repeat once: if (MOZ_UNLIKELY(duration > scMaxRunningTimesReadDuration)) {
AUTO_PROFILER_STATS(GetRunningTimes_REDO);
RunningTimes runningTimes2;
aGetCPURunningTimesFunction(runningTimes2);
TimeStamp after2 = TimeStamp::Now(); const TimeDuration duration2 = after2 - after; if (duration2 < duration) { // We did it faster, use the new results. (But it could still be slower // than expected, see note below for why it's acceptable.) // This must stay *after* the CPU measurements.
runningTimes2.SetPostMeasurementTimeStamp(after2); return runningTimes2;
} // Otherwise use the initial results, they were slow, but faster than the // second attempt. // This means that something bad happened twice in a row on the same thread! // So trying more times would be unlikely to get much better, and would be // more expensive than the precision is worth. // At worst, it means that a spike of activity may be reported in the next // time slice. But in the end, the cumulative work is conserved, so it // should still be visible at about the correct time in the graph.
AUTO_PROFILER_STATS(GetRunningTimes_RedoWasWorse);
}
// This must stay *after* the CPU measurements.
runningTimes.SetPostMeasurementTimeStamp(after);
return runningTimes;
}
//////////////////////////////////////////////////////////////////////// // BEGIN SamplerThread
// The sampler thread controls sampling and runs whenever the profiler is // active. It periodically runs through all registered threads, finds those // that should be sampled, then pauses and samples them.
class SamplerThread { public: // Creates a sampler thread, but doesn't start it.
SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, double aIntervalMilliseconds, uint32_t aFeatures);
~SamplerThread();
// This runs on (is!) the sampler thread. void Run();
#ifdefined(GP_OS_windows) // This runs on (is!) the thread to spy on unregistered threads. void RunUnregisteredThreadSpy(); #endif
// This runs on the main thread. void Stop(PSLockRef aLock);
void AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&& aCallback) { // We are under lock, so it's safe to just modify the list pointer. // Also this means the sampler has not started its run yet, so any callback // added now will be invoked at the end of the next loop; this guarantees // that the callback will be invoked after at least one full sampling loop.
mPostSamplingCallbackList = MakeUnique<PostSamplingCallbackListItem>(
std::move(mPostSamplingCallbackList), std::move(aCallback));
}
private: void SpyOnUnregisteredThreads();
// Item containing a post-sampling callback, and a tail-list of more items. // Using a linked list means no need to move items when adding more, and // "stealing" the whole list is one pointer move. struct PostSamplingCallbackListItem {
UniquePtr<PostSamplingCallbackListItem> mPrev;
PostSamplingCallback mCallback;
staticvoid InvokePostSamplingCallbacks(
UniquePtr<PostSamplingCallbackListItem> aCallbacks,
SamplingState aSamplingState) { if (!aCallbacks) { return;
} // We want to drill down to the last element in this list, which is the // oldest one, so that we invoke them in FIFO order. // We don't expect many callbacks, so it's safe to recurse. Note that we're // moving-from the UniquePtr, so the tail will implicitly get destroyed.
InvokePostSamplingCallbacks(std::move(aCallbacks->mPrev), aSamplingState); // We are going to destroy this item, so we can safely move-from the // callback before calling it (in case it has an rvalue-ref-qualified call // operator).
std::move(aCallbacks->mCallback)(aSamplingState); // It may be tempting for a future maintainer to change aCallbacks into an // rvalue reference; this will remind them not to do that!
static_assert(
std::is_same_v<decltype(aCallbacks),
UniquePtr<PostSamplingCallbackListItem>>, "We need to capture the list by-value, to implicitly destroy it");
}
// This suspends the calling thread for the given number of microseconds. // Best effort timing. void SleepMicro(uint32_t aMicroseconds);
// The sampler used to suspend and sample threads.
Sampler mSampler;
// The activity generation, for detecting when the sampler thread must stop. const uint32_t mActivityGeneration;
// The interval between samples, measured in microseconds. constint mIntervalMicroseconds;
// The OS-specific handle for the sampler thread. #ifdefined(GP_OS_windows)
HANDLE mThread;
HANDLE mUnregisteredThreadSpyThread = nullptr; enumclass SpyingState {
NoSpying,
Spy_Initializing, // Spy is waiting for SamplerToSpy_Start or MainToSpy_Shutdown.
Spy_Waiting, // Sampler requests spy to start working. May be pre-empted by // MainToSpy_Shutdown.
SamplerToSpy_Start, // Spy is currently working, cannot be interrupted, only the spy is allowed // to change the state again.
Spy_Working, // Main control requests spy to shut down.
MainToSpy_Shutdown, // Spy notified main control that it's out of the loop, about to exit.
SpyToMain_ShuttingDown
};
SpyingState mSpyingState = SpyingState::NoSpying; // The sampler will increment this while the spy is working, then while the // spy is waiting the sampler will decrement it until <=0 before starting the // spy. This will ensure that the work doesn't take more than 50% of a CPU // core. int mDelaySpyStart = 0;
Monitor mSpyingStateMonitor MOZ_UNANNOTATED{ "SamplerThread::mSpyingStateMonitor"}; #elifdefined(GP_OS_darwin) || defined(GP_OS_linux) || \ defined(GP_OS_android) || defined(GP_OS_freebsd)
pthread_t mThread; #endif
// Post-sampling callbacks are kept in a simple linked list, which will be // stolen by the sampler thread at the end of its next run.
UniquePtr<PostSamplingCallbackListItem> mPostSamplingCallbackList;
// Comparisons with just a thread id, for easy searching in an array. friendbooloperator==(const SpiedThread& aSpiedThread,
base::ProcessId aThreadId) { return aSpiedThread.mThreadId == aThreadId;
} friendbooloperator==(base::ProcessId aThreadId, const SpiedThread& aSpiedThread) { return aThreadId == aSpiedThread.mThreadId;
}
};
// Time at which mSpiedThreads was previously updated. Null before 1st update.
TimeStamp mLastSpying; // Unregistered threads that have been found, and are being spied on. using SpiedThreads = AutoTArray<SpiedThread, 128>;
SpiedThreads mSpiedThreads;
// This function is required because we need to create a SamplerThread within // ActivePS's constructor, but SamplerThread is defined after ActivePS. It // could probably be removed by moving some code around. static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, double aInterval, uint32_t aFeatures) { returnnew SamplerThread(aLock, aGeneration, aInterval, aFeatures);
}
// This function is the sampler thread. This implementation is used for all // targets. void SamplerThread::Run() {
NS_SetCurrentThreadName("SamplerThread");
// Features won't change during this SamplerThread's lifetime, so we can read // them once and store them locally. const uint32_t features = []() -> uint32_t {
PSAutoLock lock; if (!ActivePS::Exists(lock)) { // If there is no active profiler, it doesn't matter what we return, // because this thread will exit before any feature is used. return 0;
} return ActivePS::Features(lock);
}();
// Not *no*-stack-sampling means we do want stack sampling. constbool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
// Use local ProfileBuffer and underlying buffer to capture the stack. // (This is to avoid touching the core buffer lock while a thread is // suspended, because that thread could be working with the core buffer as // well.
mozilla::ProfileBufferChunkManagerSingle localChunkManager(
ProfileBufferChunkManager::scExpectedMaximumStackSize);
ProfileChunkedBuffer localBuffer(
ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
ProfileBuffer localProfileBuffer(localBuffer);
// Will be kept between collections, to know what each collection does. auto previousState = localBuffer.GetState();
// This will be filled at every loop, to be used by the next loop to compute // the CPU utilization between samples.
RunningTimes processRunningTimes;
// This will be set inside the loop, from inside the lock scope, to capture // all callbacks added before that, but none after the lock is released.
UniquePtr<PostSamplingCallbackListItem> postSamplingCallbacks; // This will be set inside the loop, before invoking callbacks outside.
SamplingState samplingState{};
// This is the scheduled time at which each sampling loop should start. // It will determine the ideal next sampling start by adding the expected // interval, unless when sampling runs late -- See end of while() loop.
TimeStamp scheduledSampleStart = TimeStamp::Now();
#ifdefined(HAVE_CPU_FREQ_SUPPORT) // Used to collect CPU core frequencies, if the cpufreq feature is on.
Vector<uint32_t> CPUSpeeds;
if (XRE_IsParentProcess() && ProfilerFeature::HasCPUFrequency(features) &&
CPUSpeeds.resize(GetNumberOfProcessors())) {
{
PSAutoLock lock; if (ProfilerCPUFreq* cpuFreq = ActivePS::MaybeCPUFreq(lock); cpuFreq) {
cpuFreq->Sample(); for (size_t i = 0; i < CPUSpeeds.length(); ++i) {
CPUSpeeds[i] = cpuFreq->GetCPUSpeedMHz(i);
}
}
}
TimeStamp now = TimeStamp::Now(); for (size_t i = 0; i < CPUSpeeds.length(); ++i) {
nsAutoCString name;
name.AssignLiteral("CPU ");
name.AppendInt(i);
while (true) { const TimeStamp sampleStart = TimeStamp::Now();
// This scope is for |lock|. It ends before we sleep below.
{ // There should be no local callbacks left from a previous loop.
MOZ_ASSERT(!postSamplingCallbacks);
// Move all the post-sampling callbacks locally, so that new ones cannot // sneak in between the end of the lock scope and the invocation after it.
postSamplingCallbacks = TakePostSamplingCallbacks(lock);
if (!ActivePS::Exists(lock)) { // Exit the `while` loop, including the lock scope, before invoking // callbacks and returning.
samplingState = SamplingState::JustStopped; break;
}
// At this point profiler_stop() might have been called, and // profiler_start() might have been called on another thread. If this // happens the generation won't match. if (ActivePS::Generation(lock) != mActivityGeneration) {
samplingState = SamplingState::JustStopped; // Exit the `while` loop, including the lock scope, before invoking // callbacks and returning. break;
}
// Before sampling counters, update the process CPU counter if active. if (ActivePS::ProcessCPUCounter* processCPUCounter =
ActivePS::MaybeProcessCPUCounter(lock);
processCPUCounter) {
RunningTimes processRunningTimesDiff =
GetProcessRunningTimesDiff(lock, processRunningTimes);
Maybe<uint64_t> cpu = processRunningTimesDiff.GetJsonThreadCPUDelta(); if (cpu) {
processCPUCounter->Add(static_cast<int64_t>(*cpu));
}
}
#ifdefined(HAVE_CPU_FREQ_SUPPORT) if (XRE_IsParentProcess() && CPUSpeeds.length() > 0) { unsigned newSpeed[CPUSpeeds.length()]; if (ProfilerCPUFreq* cpuFreq = ActivePS::MaybeCPUFreq(lock);
cpuFreq) {
cpuFreq->Sample(); for (size_t i = 0; i < CPUSpeeds.length(); ++i) {
newSpeed[i] = cpuFreq->GetCPUSpeedMHz(i);
}
}
TimeStamp now = TimeStamp::Now(); for (size_t i = 0; i < CPUSpeeds.length(); ++i) { if (newSpeed[i] == CPUSpeeds[i]) { continue;
}
if (PowerCounters* powerCounters = ActivePS::MaybePowerCounters(lock);
powerCounters) {
powerCounters->Sample();
}
// handle per-process generic counters double counterSampleStartDeltaMs =
(TimeStamp::Now() - CorePS::ProcessStartTime()).ToMilliseconds(); const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock); for (auto& counter : counters) { if (auto sample = counter->Sample(); sample.isSampleNew) { // create Buffer entries for each counter
buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
buffer.AddEntry(
ProfileBufferEntry::Time(counterSampleStartDeltaMs)); #ifdefined(MOZ_MEMORY) && defined(MOZ_PROFILER_MEMORY) if (ActivePS::IsMemoryCounter(counter, lock)) { // For the memory counter, substract the size of our buffer to // avoid giving the misleading impression that the memory use // keeps on growing when it's just the profiler session that's // using a larger buffer as it gets longer.
sample.count -= static_cast<int64_t>(
ActivePS::ControlledChunkManager(lock).TotalSize());
} #endif
buffer.AddEntry(ProfileBufferEntry::Count(sample.count)); if (sample.number) {
buffer.AddEntry(ProfileBufferEntry::Number(sample.number));
}
}
}
TimeStamp countersSampled = TimeStamp::Now();
if (stackSampling || cpuUtilization) {
samplingState = SamplingState::SamplingCompleted;
// Prevent threads from ending (or starting) and allow access to all // OffThreadRef's.
ThreadRegistry::LockedRegistry lockedRegistry;
for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
ThreadRegistration::UnlockedRWForLockedProfiler&
unlockedThreadData =
offThreadRef.UnlockedRWForLockedProfilerRef();
ProfiledThreadData* profiledThreadData =
unlockedThreadData.GetProfiledThreadData(lock); if (!profiledThreadData) { // This thread is not being profiled, continue with the next one. continue;
}
const ThreadProfilingFeatures whatToProfile =
unlockedThreadData.ProfilingFeatures(); constbool threadCPUUtilization =
cpuUtilization &&
DoFeaturesIntersect(whatToProfile,
ThreadProfilingFeatures::CPUUtilization); constbool threadStackSampling =
stackSampling &&
DoFeaturesIntersect(whatToProfile,
ThreadProfilingFeatures::Sampling); if (!threadCPUUtilization && !threadStackSampling) { // Nothing to profile on this thread, continue with the next one. continue;
}
const RunningTimes runningTimesDiff = [&]() { if (!threadCPUUtilization) { // If we don't need CPU measurements, we only need a timestamp. return RunningTimes(TimeStamp::Now());
} return GetThreadRunningTimesDiff(lock, unlockedThreadData);
}();
// If the thread is asleep and has been sampled before in the same // sleep episode, or otherwise(*) if there was zero CPU activity // since the previous sampling, find and copy the previous sample, // as that's cheaper than taking a new sample. // (*) Tech note: The asleep check is done first and always, because // it is more reliable, and knows if it's the first asleep // sample, which cannot be duplicated; if the test was the other // way around, it could find zero CPU and then short-circuit // that state-changing second-asleep-check operation, which // could result in an unneeded sample. // However we're using current running times (instead of copying the // old ones) because some work could have happened. if (threadStackSampling &&
(unlockedThreadData.CanDuplicateLastSampleDueToSleep() ||
runningTimesDiff.GetThreadCPUDelta() == Some(uint64_t(0)))) { constbool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
threadId, threadSampleDeltaMs,
profiledThreadData->LastSample(), runningTimesDiff); if (dup_ok) { continue;
}
}
// Record the global profiler buffer's range start now, before // adding the first entry for this thread's sample. const uint64_t bufferRangeStart = buffer.BufferRangeStart();
// Add the thread ID now, so we know its position in the main // buffer, which is used by some JS data. // (DoPeriodicSample only knows about the temporary local buffer.) const uint64_t samplePos = buffer.AddThreadIdEntry(threadId);
profiledThreadData->LastSample() = Some(samplePos);
// Also add the time, so it's always there after the thread ID, as // expected by the parser. (Other stack data is optional.)
buffer.AddEntry(ProfileBufferEntry::TimeBeforeCompactStack(
threadSampleDeltaMs));
Maybe<double> unresponsiveDuration_ms;
// If we have RunningTimes data, store it before the CompactStack. // Note: It is not stored inside the CompactStack so that it doesn't // get incorrectly duplicated when the thread is sleeping. if (!runningTimesDiff.IsEmpty()) {
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::RunningTimes, runningTimesDiff);
}
if (threadStackSampling) {
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock
lockedThreadData = offThreadRef.GetLockedRWFromAnyThread(); // Suspend the thread and collect its stack data in the local // buffer.
mSampler.SuspendAndSampleAndResumeThread(
lock, lockedThreadData.DataCRef(), now,
[&](const Registers& aRegs, const TimeStamp& aNow) {
DoPeriodicSample(lock, lockedThreadData.DataCRef(), aRegs,
samplePos, bufferRangeStart,
localProfileBuffer);
// For "eventDelay", we want the input delay - but if // there are no events in the input queue (or even if there // are), we're interested in how long the delay *would* be // for an input event now, which would be the time to finish // the current event + the delay caused by any events // already in the input queue (plus any High priority // events). Events at lower priorities (in a // PrioritizedEventQueue) than Input count for input delay // only for the duration that they're running, since when // they finish, any queued input event would run. // // Unless we record the time state of all events and queue // states at all times, this is hard to precisely calculate, // but we can approximate it well in post-processing with // RunningEventDelay and RunningEventStart. // // RunningEventDelay is the time duration the event was // queued before starting execution. RunningEventStart is // the time the event started. (Note: since we care about // Input event delays on MainThread, for // PrioritizedEventQueues we return 0 for RunningEventDelay // if the currently running event has a lower priority than // Input (since Input events won't queue behind them). // // To directly measure this we would need to record the time // at which the newest event currently in each queue at time // X (the sample time) finishes running. This of course // would require looking into the future, or recording all // this state and then post-processing it later. If we were // to trace every event start and end we could do this, but // it would have significant overhead to do so (and buffer // usage). From a recording of RunningEventDelays and // RunningEventStarts we can infer the actual delay: // // clang-format off // Event queue: <tail> D : C : B : A <head> // Time inserted (ms): 40 : 20 : 10 : 0 // Run Time (ms): 30 : 100 : 40 : 30 // // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 // [A||||||||||||] // ----------[B|||||||||||||||||] // -------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||] // -----------------------------------------------------------------[D|||||||||...] // // Calculate the delay of a new event added at time t: (run every sample) // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart); // effective_submission = now - TimeSinceRunningEventBlockedInputEvents; // delta = (now - last_sample_time); // last_sample_time = now; // for (t=effective_submission to now) { // delay[t] += delta; // } // // Can be reduced in overhead by: // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart); // effective_submission = now - TimeSinceRunningEventBlockedInputEvents; // if (effective_submission != last_submission) { // delta = (now - last_submision); // // this loop should be made to match each sample point in the range // // intead of assuming 1ms sampling as this pseudocode does // for (t=last_submission to effective_submission-1) { // delay[t] += delta; // delta -= 1; // assumes 1ms; adjust as needed to match for() // } // last_submission = effective_submission; // } // // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final // hypothetical Submission Running @ result // event E // 0 Empty A 0 30 0 0 @0=10 30 // 10 B A 0 60 0 0 @0=20, @10=10 60 // 20 B A 0 150 0 0 @0=30, @10=20, @20=10 150 // 30 C B 20 140 10 30 @10=20, @20=10, @30=0 140 // 40 C B 20 160 @10=30, @20=20... 160 // 50 C B 20 150 150 // 60 C B 20 140 @10=50, @20=40... 140 // 70 D C 50 130 20 70 @20=50, @30=40... 130 // ... // 160 D C 50 40 @20=140, @30=130... 40 // 170 <empty> D 140 30 40 @40=140, @50=130... (rounding) 30 // 180 <empty> D 140 20 40 @40=150 20 // 190 <empty> D 140 10 40 @40=160 10 // 200 <empty> <empty> 0 0 NA 0 // // Function Delay(t) = the time between t and the time at which a hypothetical // event e would start executing, if e was enqueued at time t. // // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running // // instantly. // Delay(0) = 30 // The hypothetical event e got enqueued just after A got // // enqueued. It can start running at 30, when A is done. // Delay(5) = 25 // Delay(10) = 60 // Can start running at 70, after both A and B are done. // Delay(19) = 51 // Delay(20) = 150 // Can start running at 170, after A, B & C. // Delay(25) = 145 // Delay(30) = 170 // Can start running at 200, after A, B, C & D. // Delay(120) = 80 // Delay(200) = 0 // (assuming nothing was enqueued after D) // // For every event that gets enqueued, the Delay time will go up by the // event's running time at the time at which the event is enqueued. // The Delay function will be a sawtooth of the following shape: // // |\ |... // | \ | // |\ | \ | // | \ | \ | // |\ | \ | \ | // |\ | \| \| \ | // | \| \ | // _| \____| // // // A more complex example with a PrioritizedEventQueue: // // Event queue: <tail> D : C : B : A <head> // Time inserted (ms): 40 : 20 : 10 : 0 // Run Time (ms): 30 : 100 : 40 : 30 // Priority: Input: Norm: Norm: Norm // // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 // [A||||||||||||] // ----------[B|||||||||||||||||] // ----------------------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||] // ---------------[D||||||||||||] // // // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final // hypothetical Submission Running @ result // event // 0 Empty A 0 30 0 0 @0=10 30 // 10 B A 0 20 0 0 @0=20, @10=10 20 // 20 B A 0 10 0 0 @0=30, @10=20, @20=10 10 // 30 C B 0 40 30 30 @30=10 40 // 40 C B 0 60 30 @40=10, @30=20 60 // 50 C B 0 50 30 @50=10, @40=20, @30=30 50 // 60 C B 0 40 30 @60=10, @50=20, @40=30, @30=40 40 // 70 C D 30 30 40 70 @60=20, @50=30, @40=40 30 // 80 C D 30 20 40 70 ...@50=40, @40=50 20 // 90 C D 30 10 40 70 ...@60=40, @50=50, @40=60 10 // 100 <empty> C 0 100 100 100 @100=10 100 // 110 <empty> C 0 90 100 100 @110=10, @100=20 90
// // For PrioritizedEventQueue, the definition of the Delay(t) function is adjusted: the hypothetical event e has Input priority. // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running // // instantly. // Delay(0) = 30 // The hypothetical input event e got enqueued just after A got // // enqueued. It can start running at 30, when A is done. // Delay(5) = 25 // Delay(10) = 20 // Delay(25) = 5 // B has been queued, but e does not need to wait for B because e has Input priority and B does not. // // So e can start running at 30, when A is done. // Delay(30) = 40 // Can start running at 70, after B is done. // Delay(40) = 60 // Can start at 100, after B and D are done (D is Input Priority) // Delay(80) = 20 // Delay(100) = 100 // Wait for C to finish
// clang-format on // // Alternatively we could insert (recycled instead of // allocated/freed) input events at every sample period // (1ms...), and use them to back-calculate the delay. This // might also be somewhat expensive, and would require // guessing at the maximum delay, which would likely be in // the seconds, and so you'd need 1000's of pre-allocated // events per queue per thread - so there would be a memory // impact as well.
// Note: eventDelay is a different definition of // responsiveness than the 16ms event injection.
// Don't suppress 0's for now; that can be a future // optimization. We probably want one zero to be stored // before we start suppressing, which would be more // complex.
unresponsiveDuration_ms =
Some(currentEventDelay.ToMilliseconds() +
currentEventRunning.ToMilliseconds());
});
if (cpuUtilization) { // Suspending the thread for sampling could have added some // running time to it, discard any since the call to // GetThreadRunningTimesDiff above.
DiscardSuspendedThreadRunningTimes(lock, unlockedThreadData);
}
// If we got eventDelay data, store it before the CompactStack. // Note: It is not stored inside the CompactStack so that it // doesn't get incorrectly duplicated when the thread is sleeping. if (unresponsiveDuration_ms.isSome()) {
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::UnresponsiveDurationMs,
*unresponsiveDuration_ms);
}
}
// There *must* be a CompactStack after a TimeBeforeCompactStack; // but note that other entries may have been concurrently inserted // between the TimeBeforeCompactStack above and now. If the captured // sample from `DoPeriodicSample` is complete, copy it into the // global buffer, otherwise add an empty one to satisfy the parser // that expects one. auto state = localBuffer.GetState(); if (NS_WARN_IF(state.mFailedPutBytes !=
previousState.mFailedPutBytes)) {
LOG("Stack sample too big for local storage, failed to store %u " "bytes", unsigned(state.mFailedPutBytes -
previousState.mFailedPutBytes)); // There *must* be a CompactStack after a TimeBeforeCompactStack, // even an empty one.
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::CompactStack,
UniquePtr<ProfileChunkedBuffer>(nullptr));
} elseif (state.mRangeEnd - previousState.mRangeEnd >=
*profiler_get_core_buffer().BufferLength()) {
LOG("Stack sample too big for profiler storage, needed %u bytes", unsigned(state.mRangeEnd - previousState.mRangeEnd)); // There *must* be a CompactStack after a TimeBeforeCompactStack, // even an empty one.
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::CompactStack,
UniquePtr<ProfileChunkedBuffer>(nullptr));
} else {
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::CompactStack, localBuffer);
}
// Clean up for the next run.
localBuffer.Clear();
previousState = localBuffer.GetState();
}
} else {
samplingState = SamplingState::NoStackSamplingCompleted;
}
#ifdefined(USE_LUL_STACKWALK) // The LUL unwind object accumulates frame statistics. Periodically we // should poke it to give it a chance to print those statistics. This // involves doing I/O (fprintf, __android_log_print, etc.) and so // can't safely be done from the critical section inside // SuspendAndSampleAndResumeThread, which is why it is done here.
lul::LUL* lul = CorePS::Lul(); if (lul) {
lul->MaybeShowStats();
} #endif
TimeStamp threadsSampled = TimeStamp::Now();
buffer.CollectOverheadStats(sampleStartDeltaMs,
lockAcquired - sampleStart,
expiredMarkersCleaned - lockAcquired,
countersSampled - expiredMarkersCleaned,
threadsSampled - countersSampled);
} else {
samplingState = SamplingState::SamplingPaused;
}
} // gPSMutex is not held after this point.
// Invoke end-of-sampling callbacks outside of the locked scope.
InvokePostSamplingCallbacks(std::move(postSamplingCallbacks),
samplingState);
ProfilerChild::ProcessPendingUpdate();
if (ProfilerFeature::HasUnregisteredThreads(features)) { #ifdefined(GP_OS_windows)
{
MonitorAutoLock spyingStateLock{mSpyingStateMonitor}; switch (mSpyingState) { case SpyingState::SamplerToSpy_Start: case SpyingState::Spy_Working: // If the spy is working (or about to work), record this loop // iteration to delay the next start.
++mDelaySpyStart; break; case SpyingState::Spy_Waiting: // The Spy is idle, waiting for instructions. Should we delay? if (--mDelaySpyStart <= 0) {
mDelaySpyStart = 0;
mSpyingState = SpyingState::SamplerToSpy_Start;
mSpyingStateMonitor.NotifyAll();
} break; default: // Otherwise the spy should be initializing or shutting down.
MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing ||
mSpyingState == SpyingState::MainToSpy_Shutdown ||
mSpyingState == SpyingState::SpyToMain_ShuttingDown); break;
}
} #else // On non-Windows platforms, this is fast enough to run in this thread, // each sampling loop.
SpyOnUnregisteredThreads(); #endif
}
// We expect the next sampling loop to start `sampleInterval` after this // loop here was scheduled to start.
scheduledSampleStart += sampleInterval;
// Try to sleep until we reach that next scheduled time. const TimeStamp beforeSleep = TimeStamp::Now(); if (scheduledSampleStart >= beforeSleep) { // There is still time before the next scheduled sample time. const uint32_t sleepTimeUs = static_cast<uint32_t>(
(scheduledSampleStart - beforeSleep).ToMicroseconds()); if (sleepTimeUs >= minimumIntervalSleepUs) {
SleepMicro(sleepTimeUs);
} else { // If we're too close to that time, sleep the minimum amount of time. // Note that the next scheduled start is not shifted, so at the end of // the next loop, sleep may again be adjusted to get closer to schedule.
SleepMicro(minimumIntervalSleepUs);
}
} else { // This sampling loop ended after the next sampling should have started! // There is little point to try and keep up to schedule now, it would // require more work, while it's likely we're late because the system is // already busy. Try and restart a normal schedule from now.
scheduledSampleStart = beforeSleep + sampleInterval;
SleepMicro(static_cast<uint32_t>(sampleInterval.ToMicroseconds()));
}
}
// End of `while` loop. We can only be here from a `break` inside the loop.
InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), samplingState);
}
namespace geckoprofiler::markers {
struct UnregisteredThreadLifetimeMarker { static constexpr Span<constchar> MarkerTypeName() { return MakeStringSpan("UnregisteredThreadLifetime");
} staticvoid StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
base::ProcessId aThreadId, const ProfilerString8View& aName, const ProfilerString8View& aEndEvent) {
aWriter.IntProperty("Thread Id", aThreadId);
aWriter.StringProperty("Thread Name", aName.Length() != 0
? aName.AsSpan()
: MakeStringSpan("~Unnamed~")); if (aEndEvent.Length() != 0) {
aWriter.StringProperty("End Event", aEndEvent);
}
} static MarkerSchema MarkerTypeDisplay() { using MS = MarkerSchema;
MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer,
MS::Searchable::Searchable);
schema.AddKeyFormatSearchable("Thread Name", MS::Format::String,
MS::Searchable::Searchable);
schema.AddKeyFormat("End Event", MS::Format::String);
schema.AddStaticLabelValue( "Note", "Start and end are approximate, based on first and last appearances.");
schema.SetChartLabel( "{marker.data.Thread Name} (tid {marker.data.Thread Id})");
schema.SetTableLabel("{marker.name} lifetime"); return schema;
}
};
if (!procInfoOrError.IsResolve()) {
PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)), "Could not retrieve any process information"); return;
}
constauto& procInfoHashMap = procInfoOrError.ResolveValue(); // Expecting the requested (current) process information to be present in the // hashmap. constauto& procInfoPtr =
procInfoHashMap.readonlyThreadsafeLookup(currentProcessId); if (!procInfoPtr) {
PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)), "Could not retrieve information about this process"); return;
}
// Record the time spent so far, which is OS-bound...
PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)), "Work to discover threads");
// ... and record the time needed to process the data, which we can control.
AUTO_PROFILER_MARKER_TEXT( "Unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread()), "Work to process discovered threads and record unregistered ones"_ns);
// mLastSpying timestamp should be null only at the beginning of a session, // when mSpiedThreads is still empty.
MOZ_ASSERT_IF(mLastSpying.IsNull(), mSpiedThreads.IsEmpty());
// Find threads that were spied on but are not present anymore. constauto threadsBegin = threads.begin(); constauto threadsEnd = threads.end(); for (size_t spiedThreadIndexPlus1 = mSpiedThreads.Length();
spiedThreadIndexPlus1 != 0; --spiedThreadIndexPlus1) { const SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndexPlus1 - 1]; if (std::find_if(threadsBegin, threadsEnd,
[spiedTid = spiedThread.mThreadId]( const mozilla::ThreadInfo& aThreadInfo) { return aThreadInfo.tid == spiedTid;
}) == threadsEnd) { // This spied thread is gone.
PROFILER_MARKER(
MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName),
PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(), // Place the end between this update and the previous one.
MarkerTiming::IntervalEnd(previousSpying +
(mLastSpying - previousSpying) /
int64_t(2))),
UnregisteredThreadLifetimeMarker, spiedThread.mThreadId,
spiedThread.mName, "Thread disappeared");
// Don't spy on it anymore, assuming it won't come back.
mSpiedThreads.RemoveElementAt(spiedThreadIndexPlus1 - 1);
}
}
for (const mozilla::ThreadInfo& threadInfo : threads) { // Index of this encountered thread in mSpiedThreads, or NoIndex.
size_t spiedThreadIndex = mSpiedThreads.IndexOf(threadInfo.tid); if (IsThreadIdRegistered(ProfilerThreadId::FromNumber(threadInfo.tid))) { // This thread id is already officially registered. if (spiedThreadIndex != SpiedThreads::NoIndex) { // This now-registered thread was previously being spied.
SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex];
PROFILER_MARKER(
MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName),
PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(), // Place the end between this update and the previous one. // TODO: Find the real time from the thread registration?
MarkerTiming::IntervalEnd(previousSpying +
(mLastSpying - previousSpying) /
int64_t(2))),
UnregisteredThreadLifetimeMarker, spiedThread.mThreadId,
spiedThread.mName, "Thread registered itself");
// Remove from mSpiedThreads, since it can be profiled normally.
mSpiedThreads.RemoveElement(threadInfo.tid);
}
} else { // This thread id is not registered. if (spiedThreadIndex == SpiedThreads::NoIndex) { // This unregistered thread has not been spied yet, store it now.
NS_ConvertUTF16toUTF8 name(threadInfo.name);
mSpiedThreads.EmplaceBack(threadInfo.tid, name, threadInfo.cpuTime);
PROFILER_MARKER(
MakeThreadInfoMarkerName(threadInfo.tid, name), PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(), // Place the start between this update and the previous one (or // the start of this search if it's the first one).
MarkerTiming::IntervalStart(
mLastSpying -
(mLastSpying - (previousSpying.IsNull()
? unregisteredThreadSearchStart
: previousSpying)) /
int64_t(2))),
UnregisteredThreadLifetimeMarker, threadInfo.tid, name, /* aEndEvent */ "");
} else { // This unregistered thread was already being spied, record its work.
SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex];
int64_t diffCPUTimeNs =
int64_t(threadInfo.cpuTime) - int64_t(spiedThread.mCPUTimeNs);
spiedThread.mCPUTimeNs = threadInfo.cpuTime; if (diffCPUTimeNs != 0) {
PROFILER_MARKER(
MakeThreadInfoMarkerName(threadInfo.tid, spiedThread.mName),
PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(),
MarkerTiming::Interval(previousSpying, mLastSpying)),
UnregisteredThreadCPUMarker, threadInfo.tid, diffCPUTimeNs,
previousSpying, mLastSpying);
}
}
}
}
PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)), "Work to discover and record unregistered threads");
}
// We #include these files directly because it means those files can use // declarations from this file trivially. These provide target-specific // implementations of all SamplerThread methods except Run(). #ifdefined(GP_OS_windows) # include "platform-win32.cpp" #elifdefined(GP_OS_darwin) # include "platform-macos.cpp" #elifdefined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) # include "platform-linux-android.cpp" #else # error "bad platform" #endif
// END SamplerThread ////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////// // BEGIN externally visible functions
if (CorePS::Exists()) {
CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize);
}
if (ActivePS::Exists(lock)) {
profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf);
}
}
MOZ_COLLECT_REPORT( "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize, "Memory used by the Gecko Profiler's global state (excluding memory used " "by LUL).");
#ifdefined(USE_LUL_STACKWALK)
MOZ_COLLECT_REPORT( "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize, "Memory used by LUL, a stack unwinder used by the Gecko Profiler."); #endif
printf("\nUnrecognized feature \"%s\".\n\n", aFeature); // Since we may have an old feature we don't implement anymore, don't exit.
PrintUsage(); return 0;
}
uint32_t ParseFeaturesFromStringArray(constchar** aFeatures,
uint32_t aFeatureCount, bool aIsStartup /* = false */) {
uint32_t features = 0; for (size_t i = 0; i < aFeatureCount; i++) {
features |= ParseFeature(aFeatures[i], aIsStartup);
} return features;
}
staticvoid NotifyObservers(constchar* aTopic,
nsISupports* aSubject = nullptr) { if (!NS_IsMainThread()) { // Dispatch a task to the main thread that notifies observers. // If NotifyObservers is called both on and off the main thread within a // short time, the order of the notifications can be different from the // order of the calls to NotifyObservers. // Getting the order 100% right isn't that important at the moment, because // these notifications are only observed in the parent process, where the // profiler_* functions are currently only called on the main thread.
nsCOMPtr<nsISupports> subject = aSubject;
NS_DispatchToMainThread(NS_NewRunnableFunction( "NotifyObservers", [=] { NotifyObservers(aTopic, subject); })); return;
}
if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
os->NotifyObservers(aSubject, aTopic, nullptr);
}
}
// Iterate over all characters in aStorage and split at commas, by // overwriting commas with the null char.
Vector<constchar*> array;
size_t currentElementStart = 0; for (size_t i = 0; i <= len; i++) { if (aStorage[i] == ',') {
aStorage[i] = '\0';
} if (aStorage[i] == '\0') { // Only add non-empty elements, otherwise ParseFeatures would later // complain about unrecognized features. if (currentElementStart != i) {
MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
}
currentElementStart = i + 1;
}
} return array;
}
#ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) staticvoid profiler_start_signal_handler(int signal, siginfo_t* info, void* context) { // Starting the profiler from a signal handler is a risky business: Both of // the main tasks that we would like to accomplish (allocating memory, and // starting a thread) are illegal within a UNIX signal handler. Conversely, // we cannot dispatch to the main thread, as this may be "stuck" (why else // would we be using a signal handler to start the profiler?). // Instead, we have a background thread running that watches a pipe for a // given "control" character. In this handler, we can simply write to that // pipe to get the background thread to start the profiler for us! // Note that `write` is async-signal safe (see signal-safety(7)): // https://www.man7.org/linux/man-pages/man7/signal-safety.7.html // This means that it's safe for us to call within a signal handler. if (sAsyncSignalControlWriteFd != -1) { char signalControlCharacter = sAsyncSignalControlCharStart;
Unused << write(sAsyncSignalControlWriteFd, &signalControlCharacter, sizeof(signalControlCharacter));
}
}
staticvoid profiler_stop_signal_handler(int signal, siginfo_t* info, void* context) { // Signal handlers are limited in what functions they can call, for more // details see: https://man7.org/linux/man-pages/man7/signal-safety.7.html // As we have a background thread already running for checking whether or // not we want to start the profiler, we can re-use the same machinery to // stop the profiler. We use the same mechanism of writing to a pipe/file // descriptor, but with a different control character. Note that `write` is // signal safe. if (sAsyncSignalControlWriteFd != -1) { char signalControlCharacter = sAsyncSignalControlCharStop;
Unused << write(sAsyncSignalControlWriteFd, &signalControlCharacter, sizeof(signalControlCharacter));
}
} #endif
// This may fail if we have previously had an issue finding the download // directory, or if the directory has moved since we cached the path. // This is non-ideal, but captured by Bug 1885000
Maybe<nsAutoCString> profiler_find_dump_path() { // Note, this is currently a posix-only implementation, as we currently have // issues with fetching the download directory on Windows. See Bug 1890154. #ifdefined(XP_WIN) return Nothing(); #else
Maybe<nsCOMPtr<nsIFile>> directory = Nothing();
nsAutoCString path;
{ // Acquire the lock so that we can get things from CorePS
PSAutoLock lock;
Maybe<nsCOMPtr<nsIFile>> downloadDir = Nothing();
downloadDir = CorePS::AsyncSignalDumpDirectory(lock);
// This needs to be done within the context of the lock, as otherwise // another thread might modify CorePS::mAsyncSignalDumpDirectory while we're // cloning the pointer. if (downloadDir) {
nsCOMPtr<nsIFile> d;
downloadDir.value()->Clone(getter_AddRefs(d));
directory = Some(d);
} else { return Nothing();
}
}
// Now, we can check to see if we have a directory, and use it to construct // the output file if (directory) { // Set up the name of our profile file
path.AppendPrintf("profile_%i_%i.json", XRE_GetProcessType(), getpid());
// Append it to the directory we found
nsresult rv = directory.value()->AppendNative(path); if (NS_FAILED(rv)) {
LOG("Failed to append path to profile file"); return Nothing();
}
// Write the result *back* to the original path
rv = directory.value()->GetNativePath(path); if (NS_FAILED(rv)) {
LOG("Failed to get native path for temp path"); return Nothing();
}
return Some(path);
}
return Nothing(); #endif
}
void profiler_start_from_signal() { // Do nothing unless we're the parent process, as we're sandboxed and can't // write any data that we gather anyway. if (XRE_IsParentProcess()) { // Start the profiler here directly, as we're on a background thread. // set of preferences, configuration of them is TODO, see Bug 1866007 // Enabling the JS feature leaks an 8-byte object during testing, but is too // useful to disable. See Bug 1904897, Bug 1699681, and browser.toml for // more details.
uint32_t features = ProfilerFeature::JS | ProfilerFeature::StackWalk |
ProfilerFeature::CPUUtilization; // as we often don't know what threads we'll care about, tell the // profiler to profile all threads. constchar* filters[] = {"*"}; if (MOZ_UNLIKELY(NS_IsMainThread())) { // We are on the main thread here, so `NotifyProfilerStarted` will // start the profiler in content/child processes.
profiler_start(PROFILER_DEFAULT_SIGHANDLE_ENTRIES,
PROFILER_DEFAULT_INTERVAL, features, filters,
std::size(filters), 0);
} else { // Directly start the profiler on this thread. We know we're not the main // thread here, so this will not start the profiler in child processes, // but we want to make sure that we do it here in case the main thread is // stuck.
profiler_start(PROFILER_DEFAULT_SIGHANDLE_ENTRIES,
PROFILER_DEFAULT_INTERVAL, features, filters,
std::size(filters), 0); // Now also try and start the profiler from the main thread, so that the // ParentProfiler will start child threads.
NS_DispatchToMainThread(
NS_NewRunnableFunction("StartProfilerInChildProcesses", [=] {
Unused << NotifyProfilerStarted(
PROFILER_DEFAULT_SIGHANDLE_ENTRIES, Nothing(),
PROFILER_DEFAULT_INTERVAL, features, const_cast<constchar**>(filters), std::size(filters), 0);
}));
}
}
}
void profiler_dump_and_stop() { // Do nothing unless we're the parent process, as we're sandboxed and can't // open a file handle anyway. if (!XRE_IsParentProcess()) { return;
}
// pause the profiler until we are done dumping
profiler_pause();
// Try to save the profile to a file auto path = profiler_find_dump_path();
// Exit quickly if we can't find the path, while stopping the profiler if (!path) {
LOG("Failed to find a valid dump path to write profile to disk");
profiler_stop(); return;
}
// Dump the profile of this process first, in case the multi-process // gathering is unsuccessful (e.g. due to a blocked main threaed).
profiler_save_profile_to_file(path.value().get());
// We are probably not the main thread, but check anyway, and dispatch // directly. if (NS_IsMainThread()) {
nsCOMPtr<nsIProfiler> nsProfiler(
do_GetService("@mozilla.org/tools/profiler;1"));
nsProfiler->DumpProfileToFileAsyncNoJs(path.value(), 0)
->Then(
GetMainThreadSerialEventTarget(), __func__,
[](void_t ok) {
LOG("Stopping profiler after dumping profile to disk");
profiler_stop();
},
[](nsresult aRv) {
LOG("Dumping to disk failed with error \"%s\", stopping " "profiler.",
GetStaticErrorName(aRv));
profiler_stop();
});
} else { // Dispatch a runnable, as nsProfiler classes are currently main-thread // only. We also stop the profiler within the runnable, as otherwise we // may find ourselves stopping the profiler before the runnable has // gathered all the profile data.
NS_DispatchToMainThread(
NS_NewRunnableFunction("WriteProfileDataToFile", [=] {
nsCOMPtr<nsIProfiler> nsProfiler(
do_GetService("@mozilla.org/tools/profiler;1"));
nsProfiler->DumpProfileToFileAsyncNoJs(path.value(), 0)
->Then(
GetMainThreadSerialEventTarget(), __func__,
[](void_t ok) {
LOG("Stopping profiler after dumping profile to disk");
profiler_stop();
},
[](nsresult aRv) {
LOG("Dumping to disk failed with error \"%s\", stopping " "profiler.",
GetStaticErrorName(aRv));
profiler_stop();
});
}));
}
}
#ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) void profiler_init_signal_handlers() { // Set a handler to start the profiler struct sigaction prof_start_sa{};
memset(&prof_start_sa, 0, sizeof(struct sigaction));
prof_start_sa.sa_sigaction = profiler_start_signal_handler;
prof_start_sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigemptyset(&prof_start_sa.sa_mask);
DebugOnly<int> rstart = sigaction(SIGUSR1, &prof_start_sa, nullptr);
MOZ_ASSERT(rstart == 0, "Failed to install Profiler SIGUSR1 handler");
// Set a handler to stop the profiler struct sigaction prof_stop_sa{};
memset(&prof_stop_sa, 0, sizeof(struct sigaction));
prof_stop_sa.sa_sigaction = profiler_stop_signal_handler;
prof_stop_sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigemptyset(&prof_stop_sa.sa_mask);
DebugOnly<int> rstop = sigaction(SIGUSR2, &prof_stop_sa, nullptr);
MOZ_ASSERT(rstop == 0, "Failed to install Profiler SIGUSR2 handler");
} #endif
staticvoid PollJSSamplingForCurrentThread() { // Don't call into the JS engine with the global profiler mutex held as this // can deadlock.
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
// We've passed the possible failure point. Instantiate CorePS, which // indicates that the profiler has initialized successfully.
CorePS::Create(lock);
// Make sure threads already in the ThreadRegistry (like the main thread) // get registered in CorePS as well.
{
ThreadRegistry::LockedRegistry lockedRegistry; for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
locked_register_thread(lock, offThreadRef);
}
} // Platform-specific initialization.
PlatformInit(lock);
#ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) // Initialise the background thread to listen for signal handler // communication
CorePS::SetAsyncSignalControlThread(new AsyncSignalControlThread);
// Initialise the signal handlers needed to start/stop the profiler
profiler_init_signal_handlers(); #endif
#ifdefined(GP_OS_android) if (jni::IsAvailable()) {
GeckoJavaSampler::Init();
} #endif
// (Linux-only) We could create CorePS::mLul and read unwind info into it // at this point. That would match the lifetime implied by destruction of // it in profiler_shutdown() just below. However, that gives a big delay on // startup, even if no profiling is actually to be done. So, instead, it is // created on demand at the first call to PlatformStart().
// `long` could be 32 or 64 bits, so we force a 64-bit comparison with // the maximum 32-bit signed number (as more than that is clamped down to // 2^31 anyway). if (errno == 0 && capacityLong > 0 && static_cast<uint64_t>(capacityLong) <= static_cast<uint64_t>(INT32_MAX)) {
capacity = PowerOfTwo32(
ClampToAllowedEntries(static_cast<uint32_t>(capacityLong)));
LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
} else {
LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
startupCapacity);
PrintUsage(); exit(1);
}
}
// We do this with gPSMutex unlocked. The comment in profiler_stop() explains // why.
Unused << NotifyProfilerStarted(capacity, duration, interval, features,
filters.begin(), filters.length(), 0);
}
if (profiler_is_active()) {
invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
}
invoke_profiler_state_change_callbacks(ProfilingState::ShuttingDown);
// We collect information here to be used below so it can be done outside of // the lock. We only need it if MOZ_PROFILER_SHUTDOWN is set and not empty. constchar* filename = getenv("MOZ_PROFILER_SHUTDOWN");
PreRecordedMetaInformation preRecordedMetaInformation = {}; if (filename && filename[0] != '\0') {
preRecordedMetaInformation = PreRecordMetaInformation(/* aShutdown */ true);
}
ProfilerParent::ProfilerWillStopIfStarted();
// If the profiler is active we must get a handle to the SamplerThread before // ActivePS is destroyed, in order to delete it.
SamplerThread* samplerThread = nullptr;
{
PSAutoLock lock;
// Save the profile on shutdown if requested. if (ActivePS::Exists(lock)) { if (filename && filename[0] != '\0') {
locked_profiler_save_profile_to_file(lock, filename,
preRecordedMetaInformation, /* aIsShuttingDown */ true);
} if (aIsFastShutdown == IsFastShutdown::Yes) { return;
}
// We do these operations with gPSMutex unlocked. The comments in // profiler_stop() explain why. if (samplerThread) {
Unused << ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped"); delete samplerThread;
}
// Reverse the registration done in profiler_init.
ThreadRegistration::UnregisterThread();
}
// Don't include profiles from other processes because this is a // synchronous function.
aWriter.StartArrayProperty("processes");
aWriter.EndArray();
}
aWriter.End(); return !aWriter.Failed();
}
if (!ActivePS::Exists(lock)) {
aSetEnv("MOZ_PROFILER_STARTUP", ""); return;
}
aSetEnv("MOZ_PROFILER_STARTUP", "1");
// If MOZ_PROFILER_SHUTDOWN is defined, make sure it's empty in children, so // that they don't attempt to write over that file. if (getenv("MOZ_PROFILER_SHUTDOWN")) {
aSetEnv("MOZ_PROFILER_SHUTDOWN", "");
}
// Hidden option to stop Base Profiler, mostly due to Talos intermittents, // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3 // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325. if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
aSetEnv("MOZ_PROFILER_STARTUP_NO_BASE", "1");
}
auto capacityString =
Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
// Use AppendFloat instead of Smprintf with %f because the decimal // separator used by %f is locale-dependent. But the string we produce needs // to be parseable by strtod, which only accepts the period character as a // decimal separator. AppendFloat always uses the period character.
nsCString intervalString;
intervalString.AppendFloat(ActivePS::Interval(lock));
aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.get());
auto featuresString = Smprintf("%d", ActivePS::Features(lock));
aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
std::string filtersString; const Vector<std::string>& filters = ActivePS::Filters(lock); for (uint32_t i = 0; i < filters.length(); ++i) { if (i != 0) {
filtersString += ",";
}
filtersString += filters[i];
}
aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
auto activeTabIDString = Smprintf("%" PRIu64, ActivePS::ActiveTabID(lock));
aSetEnv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID", activeTabIDString.get());
}
// When the profiler is started on a background thread, we can't synchronously // call PollJSSampling on the main thread's ThreadInfo. And the next regular // call to PollJSSampling on the main thread would only happen once the main // thread triggers a JS interrupt callback. // This means that all the JS execution between profiler_start() and the first // JS interrupt would happen with JS sampling disabled, and we wouldn't get any // JS function information for that period of time. // So in order to start JS sampling as soon as possible, we dispatch a runnable // to the main thread which manually calls PollJSSamplingForCurrentThread(). // In some cases this runnable will lose the race with the next JS interrupt. // That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls. staticvoid TriggerPollJSSamplingOnMainThread() {
nsCOMPtr<nsIThread> mainThread;
nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread)); if (NS_SUCCEEDED(rv) && mainThread) {
nsCOMPtr<nsIRunnable> task =
NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread",
[]() { PollJSSamplingForCurrentThread(); });
SchedulerGroup::Dispatch(task.forget());
}
}
// Do this before the Base Profiler is stopped, to keep the existing buffer // (if any) alive for our use. if (NS_IsMainThread()) {
mozilla::base_profiler_markers_detail::EnsureBufferForMainThreadAddMarker();
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("EnsureBufferForMainThreadAddMarker",
&mozilla::base_profiler_markers_detail::
EnsureBufferForMainThreadAddMarker));
}
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> baseChunkManager; bool profilersHandOver = false; if (baseprofiler::profiler_is_active()) { // Note that we still hold the lock, so the sampler cannot run yet and // interact negatively with the still-active BaseProfiler sampler. // Assume that Base Profiler is active because of MOZ_PROFILER_STARTUP.
// Take ownership of the chunk manager from the Base Profiler, to extend its // lifetime during the new Gecko Profiler session. Since we're using the // same core buffer, all the base profiler data remains.
baseChunkManager = baseprofiler::detail::ExtractBaseProfilerChunkManager();
if (baseChunkManager) {
profilersHandOver = true; if (const TimeStamp baseProfilingStartTime =
baseprofiler::detail::GetProfilingStartTime();
!baseProfilingStartTime.IsNull()) {
profilingStartTime = baseProfilingStartTime;
}
BASE_PROFILER_MARKER_TEXT( "Profilers handover", PROFILER, MarkerTiming::IntervalStart(), "Transition from Base to Gecko Profiler, some data may be missing");
}
// Now stop Base Profiler (BP), as further recording will be ignored anyway, // and so that it won't clash with Gecko Profiler (GP) sampling starting // after the lock is dropped. // On Linux this is especially important to do before creating the GP // sampler, because the BP sampler may send a signal (to stop threads to be // sampled), which the GP would intercept before its own initialization is // complete and ready to handle such signals. // Note that even though `profiler_stop()` doesn't immediately destroy and // join the sampler thread, it safely deactivates it in such a way that the // thread will soon exit without doing any actual work. // TODO: Allow non-sampling profiling to continue. // TODO: Re-start BP after GP shutdown, to capture post-XPCOM shutdown.
baseprofiler::profiler_stop();
}
// Fall back to the default values if the passed-in values are unreasonable. // We want to be able to store at least one full stack.
PowerOfTwo32 capacity =
(aCapacity.Value() >=
ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
? aCapacity
: PROFILER_DEFAULT_ENTRIES;
Maybe<double> duration = aDuration;
// ActivePS::Create can only succeed or crash.
MOZ_ASSERT(ActivePS::Exists(aLock));
// Set up profiling for each registered thread, if appropriate. #ifdefined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) bool isMainThreadBeingProfiled = false; #endif
ThreadRegistry::LockedRegistry lockedRegistry; for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { const ThreadRegistrationInfo& info =
offThreadRef.UnlockedConstReaderCRef().Info();
ThreadProfilingFeatures threadProfilingFeatures =
ActivePS::ProfilingFeaturesForThread(aLock, info); if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) {
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
offThreadRef.GetLockedRWFromAnyThread();
ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
aLock, MakeUnique<ProfiledThreadData>(info));
lockedThreadData->SetProfilingFeaturesAndData(threadProfilingFeatures,
profiledThreadData, aLock);
lockedThreadData->GetNewCpuTimeInNs(); if (ActivePS::FeatureJS(aLock)) {
lockedThreadData->StartJSSampling(ActivePS::JSFlags(aLock)); if (!lockedThreadData.GetLockedRWOnThread() && info.IsMainThread()) { // Dispatch a runnable to the main thread to call // PollJSSampling(), so that we don't have wait for the next JS // interrupt callback in order to start profiling JS.
TriggerPollJSSamplingOnMainThread();
}
} #ifdefined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) if (info.IsMainThread()) {
isMainThreadBeingProfiled = true;
} #endif
lockedThreadData->ReinitializeOnResume(); if (ActivePS::FeatureJS(aLock) && lockedThreadData->GetJSContext()) {
profiledThreadData->NotifyReceivedJSContext(0); if (ActivePS::FeatureTracing(aLock)) {
CycleCollectedJSContext* ctx =
lockedThreadData->GetCycleCollectedJSContext();
ctx->BeginExecutionTracingAsync();
}
}
}
}
// Setup support for pushing/popping labels in mozglue.
RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit);
#ifdefined(GP_OS_android) if (ActivePS::FeatureJava(aLock)) { int javaInterval = interval; // Java sampling doesn't accurately keep up with the sampling rate that is // lower than 1ms. if (javaInterval < 1) {
javaInterval = 1;
}
JNIEnv* env = jni::GetEnvForThread(); constauto& filters = ActivePS::Filters(aLock);
jni::ObjectArray::LocalRef javaFilters =
jni::ObjectArray::New<jni::String>(filters.length()); for (size_t i = 0; i < filters.length(); i++) {
javaFilters->SetElement(i, jni::StringParam(filters[i].data(), env));
}
// Send the interval-relative entry count, but we have 100000 hard cap in // the java code, it can't be more than that.
java::GeckoJavaSampler::Start(
javaFilters, javaInterval,
std::round((double)(capacity.Value()) * interval /
(double)(javaInterval)));
} #endif
#ifdefined(MOZ_MEMORY) && defined(MOZ_PROFILER_MEMORY) if (ActivePS::FeatureMemory(aLock)) { auto counter = mozilla::profiler::create_memory_counter();
locked_profiler_add_sampled_counter(aLock, counter.get());
ActivePS::SetMemoryCounter(std::move(counter), aLock);
} #endif
#ifdefined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) if (ActivePS::FeatureNativeAllocations(aLock)) { if (isMainThreadBeingProfiled) {
mozilla::profiler::enable_native_allocations();
} else {
NS_WARNING( "The nativeallocations feature is turned on, but the main thread is " "not being profiled. The allocations are only stored on the main " "thread.");
}
} #endif
if (ProfilerFeature::HasAudioCallbackTracing(aFeatures)) {
StartAudioCallbackTracing();
}
// At the very end, set up RacyFeatures.
RacyFeatures::SetActive(ActivePS::Features(aLock));
if (profilersHandOver) {
PROFILER_MARKER_UNTYPED("Profilers handover", PROFILER,
MarkerTiming::IntervalEnd());
}
}
// Initialize if necessary. if (!CorePS::Exists()) {
profiler_init(nullptr);
}
// Reset the current state if the profiler is running. if (ActivePS::Exists(lock)) { // Note: Not invoking callbacks with ProfilingState::Stopping, because // we're under lock, and also it would not be useful: Any profiling data // will be discarded, and we're immediately restarting the profiler below // and then notifying ProfilingState::Started.
samplerThread = locked_profiler_stop(lock);
}
// We do these operations with gPSMutex unlocked. The comments in // profiler_stop() explain why. if (samplerThread) {
Unused << ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped"); delete samplerThread;
} return NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures,
aFilters, aFilterCount, aActiveTabID);
}
// Initialize if necessary. if (!CorePS::Exists()) {
profiler_init(nullptr);
}
if (ActivePS::Exists(lock)) { // The profiler is active. if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
aFilters, aFilterCount, aActiveTabID)) { // Stop and restart with different settings. // Note: Not invoking callbacks with ProfilingState::Stopping, because // we're under lock, and also it would not be useful: Any profiling data // will be discarded, and we're immediately restarting the profiler // below and then notifying ProfilingState::Started.
samplerThread = locked_profiler_stop(lock);
locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
aFilterCount, aActiveTabID, aDuration);
startedProfiler = true;
}
} else { // The profiler is stopped.
locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
aFilterCount, aActiveTabID, aDuration);
startedProfiler = true;
}
}
PollJSSamplingForCurrentThread();
// We do these operations with gPSMutex unlocked. The comments in // profiler_stop() explain why. if (samplerThread) {
Unused << ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped"); delete samplerThread;
}
if (startedProfiler) {
invoke_profiler_state_change_callbacks(ProfilingState::Started);
if (ActivePS::FeatureJS(aLock)) { if (ActivePS::FeatureTracing(aLock)) {
CycleCollectedJSContext* ctx =
lockedThreadData->GetCycleCollectedJSContext(); if (ctx) {
ctx->EndExecutionTracingAsync();
}
}
lockedThreadData->StopJSSampling(); if (!lockedThreadData.GetLockedRWOnThread() &&
lockedThreadData->Info().IsMainThread()) { // Dispatch a runnable to the main thread to call PollJSSampling(), // so that we don't have wait for the next JS interrupt callback in // order to start profiling JS.
TriggerPollJSSamplingOnMainThread();
}
}
}
// The Stop() call doesn't actually stop Run(); that happens in this // function's caller when the sampler thread is destroyed. Stop() just gives // the SamplerThread a chance to do some cleanup with gPSMutex locked.
SamplerThread* samplerThread = ActivePS::Destroy(aLock);
samplerThread->Stop(aLock);
if (profiler_is_active()) {
invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
}
ProfilerParent::ProfilerWillStopIfStarted();
#ifdefined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) // Remove the hooks early, as native allocations (if they are on) can be // quite expensive.
mozilla::profiler::remove_memory_hooks(); #endif
SamplerThread* samplerThread;
{
PSAutoLock lock;
if (!ActivePS::Exists(lock)) { return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
samplerThread = locked_profiler_stop(lock);
}
PollJSSamplingForCurrentThread();
// We notify observers with gPSMutex unlocked. Otherwise we might get a // deadlock, if code run by these functions calls a profiler function that // locks gPSMutex, for example when it wants to insert a marker. // (This has been seen in practise in bug 1346356, when we were still firing // these notifications synchronously.)
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped");
// We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we // would be waiting here with gPSMutex locked for SamplerThread::Run() to // return so the join operation within the destructor can complete, but Run() // needs to lock gPSMutex to return. // // Because this call occurs with gPSMutex unlocked, it -- including the final // iteration of Run()'s loop -- must be able detect deactivation and return // in a way that's safe with respect to other gPSMutex-locking operations // that may have occurred in the meantime. delete samplerThread;
// See `ProfilerControl.h` for more details. void profiler_lookup_async_signal_dump_directory() { // This implementation is causing issues on Windows (see Bug 1890154) but as it // only exists to support the posix signal handling (on non-windows platforms) // we can remove it for now. #if !defined(XP_WIN)
LOG("profiler_lookup_async_signal_dump_directory");
MOZ_ASSERT(
NS_IsMainThread(), "We can only get access to the directory service from the main thread");
// Make sure the profiler is actually running~
MOZ_RELEASE_ASSERT(CorePS::Exists());
// take the lock so that we can write to CorePS
PSAutoLock lock;
nsresult rv;
// Check to see if we have a `MOZ_UPLOAD_DIR` first - i.e., check to see if // we're running in CI.
LOG("Checking if MOZ_UPLOAD_DIR exists"); constchar* mozUploadDir = getenv("MOZ_UPLOAD_DIR"); if (mozUploadDir && mozUploadDir[0] != '\0') {
LOG("Found MOZ_UPLOAD_DIR at: %s", mozUploadDir); // We want to do the right thing, and turn this into an nsIFile. Go through // the motions here:
nsCOMPtr<nsIFile> mozUploadDirFile;
rv = NS_NewNativeLocalFile(nsDependentCString(mozUploadDir),
getter_AddRefs(mozUploadDirFile)); if (NS_FAILED(rv)) {
LOG("Failed to assign a filepath while creating MOZ_UPLOAD_DIR file " "%s, Error %s ",
mozUploadDir, GetStaticErrorName(rv)); return;
}
CorePS::SetAsyncSignalDumpDirectory(lock, Some(mozUploadDirFile));
} else {
LOG("Defaulting to the user's Download directory for profile dumps");
nsCOMPtr<nsIFile> tDownloadDir;
rv = NS_GetSpecialDirectory(NS_OS_DEFAULT_DOWNLOAD_DIR,
getter_AddRefs(tDownloadDir)); if (NS_FAILED(rv)) {
LOG("Failed to find download directory. Profiler signal handling will " "not be able to save to disk. Error: %s",
GetStaticErrorName(rv));
} else {
CorePS::SetAsyncSignalDumpDirectory(lock, Some(tDownloadDir));
}
} #endif
}
if (!ActivePS::Exists(lock)) { return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
#ifdefined(GP_OS_android) if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { // Not paused yet, so this is the first pause, let Java know. // TODO: Distinguish Pause and PauseSampling in Java.
java::GeckoJavaSampler::PauseSampling();
} #endif
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPaused();
NotifyObservers("profiler-paused"); return promise;
}
#ifdefined(GP_OS_android) if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { // Not paused anymore, so this is the last unpause, let Java know. // TODO: Distinguish Unpause and UnpauseSampling in Java.
java::GeckoJavaSampler::UnpauseSampling();
} #endif
}
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumed();
NotifyObservers("profiler-resumed");
if (!ActivePS::Exists(lock)) { return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
#ifdefined(GP_OS_android) if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { // Not paused yet, so this is the first pause, let Java know. // TODO: Distinguish Pause and PauseSampling in Java.
java::GeckoJavaSampler::PauseSampling();
} #endif
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPausedSampling();
NotifyObservers("profiler-paused-sampling"); return promise;
}
#ifdefined(GP_OS_android) if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { // Not paused anymore, so this is the last unpause, let Java know. // TODO: Distinguish Unpause and UnpauseSampling in Java.
java::GeckoJavaSampler::UnpauseSampling();
} #endif
}
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumedSampling();
NotifyObservers("profiler-resumed-sampling"); return promise;
}
bool profiler_feature_active(uint32_t aFeature) { // This function runs both on and off the main thread.
MOZ_RELEASE_ASSERT(CorePS::Exists());
// This function is hot enough that we use RacyFeatures, not ActivePS. return RacyFeatures::IsActiveWithFeature(aFeature);
}
bool profiler_active_without_feature(uint32_t aFeature) { // This function runs both on and off the main thread.
// This function is hot enough that we use RacyFeatures, not ActivePS. return RacyFeatures::IsActiveWithoutFeature(aFeature);
}
void profiler_count_bandwidth_bytes(int64_t aCount) {
NS_ASSERTION(profiler_feature_active(ProfilerFeature::Bandwidth), "Should not call profiler_count_bandwidth_bytes when the " "Bandwidth feature is not set");
ProfilerBandwidthCounter* counter = CorePS::GetBandwidthCounter(); if (MOZ_UNLIKELY(!counter)) {
counter = new ProfilerBandwidthCounter();
CorePS::SetBandwidthCounter(counter);
}
// This will call `ThreadRegistry::Register()` (see below). return ThreadRegistration::RegisterThread(aName, aGuessStackTop);
}
/* static */ void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) { // Set the thread name (except for the main thread, which is controlled // elsewhere, and influences the process name on some systems like Linux). if (!aOnThreadRef.UnlockedConstReaderCRef().Info().IsMainThread()) { // Make sure we have a nsThread wrapper for the current thread, and that // NSPR knows its name.
(void)NS_GetCurrentThread();
NS_SetCurrentThreadName(
aOnThreadRef.UnlockedConstReaderCRef().Info().Name());
}
void profiler_unregister_thread() { // This will call `ThreadRegistry::Unregister()` (see below).
ThreadRegistration::UnregisterThread();
}
staticvoid locked_unregister_thread(
PSLockRef lock, ThreadRegistration::OnThreadRef aOnThreadRef) { if (!CorePS::Exists()) { // This function can be called after the main thread has already shut // down. return;
}
// We don't call StopJSSampling() here; there's no point doing that for a JS // thread that is in the process of disappearing.
// When a Browsing context is first loaded, the first url loaded in it will be // about:blank. Because of that, this call keeps the first non-about:blank // registration of window and discards the previous one.
RefPtr<PageInformation> pageInfo = new PageInformation(
aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID, aIsPrivateBrowsing);
CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
// After appending the given page to CorePS, look for the expired // pages and remove them if there are any. if (ActivePS::Exists(lock)) {
ActivePS::DiscardExpiredPages(lock);
}
}
if (!CorePS::Exists()) { // This function can be called after the main thread has already shut down. return;
}
// During unregistration, if the profiler is active, we have to keep the // page information since there may be some markers associated with the given // page. But if profiler is not active. we have no reason to keep the // page information here because there can't be any marker associated with it. if (ActivePS::Exists(lock)) {
ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
} else {
CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
}
}
// Quick is-active and feature check before allocating a buffer. // If NoMarkerStacks is set, we don't want to capture a backtrace. if (!profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) { return nullptr;
}
auto buffer = MakeUnique<ProfileChunkedBuffer>(
ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
MakeUnique<ProfileBufferChunkManagerSingle>(
ProfileBufferChunkManager::scExpectedMaximumStackSize));
if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) { return nullptr;
}
bool profiler_is_locked_on_current_thread() { // This function is used to help users avoid calling `profiler_...` functions // when the profiler may already have a lock in place, which would prevent a // 2nd recursive lock (resulting in a crash or a never-ending wait), or a // deadlock between any two mutexes. So we must return `true` for any of: // - The main profiler mutex, used by most functions, and/or // - The buffer mutex, used directly in some functions without locking the // main mutex, e.g., marker-related functions. // - The ProfilerParent or ProfilerChild mutex, used to store and process // buffer chunk updates. return PSAutoLock::IsLockedOnCurrentThread() ||
ThreadRegistry::IsRegistryMutexLockedOnCurrentThread() ||
ThreadRegistration::IsDataMutexLockedOnCurrentThread() ||
profiler_get_core_buffer().IsThreadSafeAndLockedOnCurrentThread() ||
ProfilerParent::IsLockedOnCurrentThread() ||
ProfilerChild::IsLockedOnCurrentThread();
}
void profiler_set_js_context(CycleCollectedJSContext* aCx) {
MOZ_ASSERT(aCx);
ThreadRegistration::WithOnThreadRef(
[&](ThreadRegistration::OnThreadRef aOnThreadRef) { // The profiler mutex must be locked before the ThreadRegistration's.
PSAutoLock lock;
aOnThreadRef.WithLockedRWOnThread(
[&](ThreadRegistration::LockedRWOnThread& aThreadData) {
aThreadData.SetCycleCollectedJSContext(aCx);
if (!ActivePS::Exists(lock) || !ActivePS::FeatureJS(lock)) { return;
}
if (ProfiledThreadData* profiledThreadData =
aThreadData.GetProfiledThreadData(lock);
profiledThreadData) {
profiledThreadData->NotifyReceivedJSContext(
ActivePS::Buffer(lock).BufferRangeEnd()); if (ActivePS::FeatureTracing(lock)) {
aCx->BeginExecutionTracingAsync();
}
}
});
});
// This call is on-thread, so we can call PollJSSampling() to start JS // sampling immediately.
PollJSSamplingForCurrentThread();
}
// The profiler mutex must be locked before the ThreadRegistration's.
{
PSAutoLock lock;
ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData =
aOnThreadRef.GetLockedRWOnThread();
ProfiledThreadData* profiledThreadData =
lockedThreadData->GetProfiledThreadData(lock); if (!(profiledThreadData && ActivePS::Exists(lock) &&
ActivePS::FeatureJS(lock))) { // This thread is not being profiled or JS profiling is off, we only // need to clear the context pointer.
lockedThreadData->ClearCycleCollectedJSContext(); return;
}
if (ActivePS::FeatureTracing(lock)) {
cccx->EndExecutionTracingAsync();
}
// Notify the JS context that profiling for this context has // stopped. Do this by calling StopJSSampling and PollJSSampling // before nulling out the JSContext.
lockedThreadData->StopJSSampling();
}
// Drop profiler mutex for call into JS engine. This must happen before // ClearCycleCollectedJSContext below.
PollJSSamplingForCurrentThread();
// Tell the thread that we'd like to have JS sampling on this // thread again, once it gets a new JSContext (if ever).
lockedThreadData->StartJSSampling(ActivePS::JSFlags(lock));
}
});
}
if (info.IsMainThread()) {
aCollector.SetIsMainThread();
}
// Allocate the space for the native stack
NativeStack nativeStack{.mCount = 0};
auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) { // The target thread is now suspended. Collect a native backtrace, // and call the callback.
StackWalkControl* stackWalkControlIfSupported = nullptr; #ifdefined(HAVE_FASTINIT_NATIVE_UNWIND)
StackWalkControl stackWalkControl; if constexpr (StackWalkControl::scIsSupported) { if (aSampleNative) {
stackWalkControlIfSupported = &stackWalkControl;
}
} #endif const uint32_t jsFramesCount =
aJsFrames ? ExtractJsFrames(!aLockIfAsynchronousSampling, aThreadData,
aRegs, aCollector, aJsFrames,
stackWalkControlIfSupported)
: 0;
#ifdefined(HAVE_FASTINIT_NATIVE_UNWIND) if (aSampleNative) { // We can only use FramePointerStackWalk or MozStackWalk from // suspend_and_sample_thread as other stackwalking methods may not be // initialized. # ifdefined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(aThreadData, aRegs, nativeStack,
stackWalkControlIfSupported); # elif defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(aThreadData, aRegs, nativeStack,
stackWalkControlIfSupported); # else # error "Invalid configuration" # endif
if (!aLockIfAsynchronousSampling) { // Sampling the current thread, do NOT suspend it!
Registers regs; #ifdefined(HAVE_NATIVE_UNWIND)
REGISTERS_SYNC_POPULATE(regs); #else
regs.Clear(); #endif
collectStack(regs, TimeStamp::Now());
} else { // Suspend, sample, and then resume the target thread.
Sampler sampler(*aLockIfAsynchronousSampling);
TimeStamp now = TimeStamp::Now();
sampler.SuspendAndSampleAndResumeThread(*aLockIfAsynchronousSampling,
aThreadData, now, collectStack);
// NOTE: Make sure to disable the sampler before it is destroyed, in // case the profiler is running at the same time.
sampler.Disable(*aLockIfAsynchronousSampling);
}
}
// NOTE: aCollector's methods will be called while the target thread is paused. // Doing things in those methods like allocating -- which may try to claim // locks -- is a surefire way to deadlock. void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId,
uint32_t aFeatures,
ProfilerStackCollector& aCollector, bool aSampleNative /* = true */) { if (!aThreadId.IsSpecified() || aThreadId == profiler_current_thread_id()) { // Sampling the current thread. Get its information from the TLS (no locking // required.)
ThreadRegistration::WithOnThreadRef(
[&](ThreadRegistration::OnThreadRef aOnThreadRef) {
aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread(
[&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
aThreadData) { if (!aThreadData.GetJSContext()) { // No JSContext, there is no JS frame buffer (and no need for // it).
profiler_suspend_and_sample_thread( /* aLockIfAsynchronousSampling = */ nullptr, aThreadData, /* aJsFrames = */ nullptr, aFeatures, aCollector,
aSampleNative);
} else { // JSContext is present, we need to lock the thread data to // access the JS frame buffer.
aOnThreadRef.WithConstLockedRWOnThread(
[&](const ThreadRegistration::LockedRWOnThread&
aLockedThreadData) {
profiler_suspend_and_sample_thread( /* aLockIfAsynchronousSampling = */ nullptr,
aThreadData, aLockedThreadData.GetJsFrameBuffer(),
aFeatures, aCollector, aSampleNative);
});
}
});
});
} else { // Lock the profiler before accessing the ThreadRegistry.
PSAutoLock lock;
ThreadRegistry::WithOffThreadRef(
aThreadId, [&](ThreadRegistry::OffThreadRef aOffThreadRef) {
aOffThreadRef.WithLockedRWFromAnyThread(
[&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
aThreadData) {
JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
profiler_suspend_and_sample_thread(&lock, aThreadData, jsFrames,
aFeatures, aCollector,
aSampleNative);
});
});
}
}
// END externally visible functions ////////////////////////////////////////////////////////////////////////
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.269 Sekunden
(vorverarbeitet am 2026-04-28)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.