/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// There are three kinds of samples done by the profiler. // // - A "periodic" sample is the most complex kind. It is done in response to a // timer while the profiler is active. It involves writing a stack trace plus // a variety of other values (memory measurements, responsiveness // measurements, markers, etc.) into the main ProfileBuffer. The sampling is // done from off-thread, and so SuspendAndSampleAndResumeThread() is used to // get the register values. // // - A "synchronous" sample is a simpler kind. It is done in response to an API // call (profiler_get_backtrace()). It involves writing a stack trace and // little else into a temporary ProfileBuffer, and wrapping that up in a // ProfilerBacktrace that can be subsequently used in a marker. The sampling // is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the // register values. // // - A "backtrace" sample is the simplest kind. It is done in response to an // API call (profiler_suspend_and_sample_thread()). It involves getting a // stack trace via a ProfilerStackCollector; it does not write to a // ProfileBuffer. The sampling is done from off-thread, and so uses // SuspendAndSampleAndResumeThread() to get the register values.
// To simplify other code in this file, define a helper definition to avoid // repeating the same preprocessor checks.
// The signals that we use to control the profiler conflict with the signals // used to control the code coverage tool. Therefore, if coverage is enabled, // we need to disable our own signal handling mechanisms. #ifndef MOZ_CODE_COVERAGE # ifdef XP_WIN // TODO: Add support for windows "signal"-like behaviour. See Bug 1867328. # elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \ defined(GP_OS_android) || defined(GP_OS_freebsd) // Specify the specific platforms that we want to support # define GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL 1 # else // No support on this unknown platform! # endif #endif
// We need some extra includes if we're supporting async posix signals #ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) # include <signal.h> # include <fcntl.h> # include <unistd.h> # include <errno.h> # include <pthread.h> #endif
#ifdefined(GP_OS_android) # include "JavaExceptions.h" # include "mozilla/java/GeckoJavaSamplerNatives.h" # include "mozilla/jni/Refs.h" #endif
#ifdefined(XP_MACOSX) # include "nsCocoaFeatures.h" #endif
#ifdefined(GP_PLAT_amd64_darwin) # include <cpuid.h> #endif
#ifdefined(GP_OS_windows) # include <processthreadsapi.h>
// GetThreadInformation is not available on Windows 7.
WINBASEAPI BOOL WINAPI GetThreadInformation(
_In_ HANDLE hThread, _In_ THREAD_INFORMATION_CLASS ThreadInformationClass,
_Out_writes_bytes_(ThreadInformationSize) LPVOID ThreadInformation,
_In_ DWORD ThreadInformationSize);
#endif
// Win32 builds always have frame pointers, so FramePointerStackWalk() always // works. #ifdefined(GP_PLAT_x86_windows) # define HAVE_NATIVE_UNWIND # define USE_FRAME_POINTER_STACK_WALK #endif
// Win64 builds always omit frame pointers, so we use the slower // MozStackWalk(), which works in that case. #ifdefined(GP_PLAT_amd64_windows) # define HAVE_NATIVE_UNWIND # define USE_MOZ_STACK_WALK #endif
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower // MozStackWalk(). #ifdefined(GP_PLAT_arm64_windows) # define HAVE_NATIVE_UNWIND # define USE_MOZ_STACK_WALK #endif
// Mac builds use FramePointerStackWalk(). Even if we build without // frame pointers, we'll still get useful stacks in system libraries // because those always have frame pointers. // We don't use MozStackWalk() on Mac. #ifdefined(GP_OS_darwin) # define HAVE_NATIVE_UNWIND # define USE_FRAME_POINTER_STACK_WALK #endif
// Android builds use the ARM Exception Handling ABI to unwind. #ifdefined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) # define HAVE_NATIVE_UNWIND # define USE_EHABI_STACKWALK # include "EHABIStackWalk.h" #endif
// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks. #ifdefined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \ defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \ defined(GP_PLAT_arm64_freebsd) # define HAVE_NATIVE_UNWIND # define USE_LUL_STACKWALK # include "lul/LulMain.h" # include "lul/platform-linux-lul.h"
// On linux we use LUL for periodic samples and synchronous samples, but we use // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled. // (See the comment at the top of the file for a definition of // periodic/synchronous/backtrace.). // // FramePointerStackWalk can produce incomplete stacks when the current entry is // in a shared library without framepointers, however LUL can take a long time // to initialize, which is undesirable for consumers of // profiler_suspend_and_sample_thread like the Background Hang Reporter. # ifdefined(MOZ_PROFILING) # define USE_FRAME_POINTER_STACK_WALK # endif #endif
// We can only stackwalk without expensive initialization on platforms which // support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires // initializing LUL, and EHABIStackWalk requires initializing EHABI, both of // which can be expensive. #ifdefined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) # define HAVE_FASTINIT_NATIVE_UNWIND #endif
using mozilla::profiler::detail::RacyFeatures; using ThreadRegistration = mozilla::profiler::ThreadRegistration; using ThreadRegistrationInfo = mozilla::profiler::ThreadRegistrationInfo; using ThreadRegistry = mozilla::profiler::ThreadRegistry;
LazyLogModule gProfilerLog("prof");
ProfileChunkedBuffer& profiler_get_core_buffer() { // Defer to the Base Profiler in mozglue to create the core buffer if needed, // and keep a reference here, for quick access in xul. static ProfileChunkedBuffer& sProfileChunkedBuffer =
baseprofiler::profiler_get_core_buffer(); return sProfileChunkedBuffer;
}
#ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) // Control character to start the profiler ('g' for "go"!) staticconstchar sAsyncSignalControlCharStart = 'g'; // Control character to stop the profiler ('s' for "stop"!) staticconstchar sAsyncSignalControlCharStop = 's';
// This is a file descriptor that is the "write" end of the POSIX pipe that we // use to start the profiler. It is written to in profiler_start_signal_handler // and read from in AsyncSignalControlThread static mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed>
sAsyncSignalControlWriteFd(-1);
// Atomic flag to stop the profiler from within the sampling loop
mozilla::Atomic<bool, mozilla::MemoryOrdering::Relaxed> gStopAndDumpFromSignal( false); #endif
// Forward declare the function to call when we need to dump + stop from within // the async control thread void profiler_dump_and_stop(); // Forward declare the function to call when we need to start the profiler. void profiler_start_from_signal();
uint32_t features = 0;
features = ParseFeaturesFromStringArray(featureStringArray.begin(),
featureStringArray.length());
// 128 * 1024 * 1024 is the entries preset that is given in // devtools/client/performance-new/shared/background.sys.mjs
profiler_start(PowerOfTwo32(128 * 1024 * 1024), 5.0, features,
filtersTemp.begin(), filtersTemp.length(), 0, Nothing());
}
// Add all the possible features.
PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
#undef ADD_FEATURE
// Now remove features not supported on this platform/configuration. #if !defined(GP_OS_android)
ProfilerFeature::ClearJava(features); #endif #if !defined(HAVE_NATIVE_UNWIND)
ProfilerFeature::ClearStackWalk(features); #endif #ifdefined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) if (getenv("XPCOM_MEM_BLOAT_LOG")) {
DEBUG_LOG("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations."); // The memory hooks are available, but the bloat log is enabled, which is // not compatible with the native allocations tracking. See the comment in // enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for // more information.
ProfilerFeature::ClearNativeAllocations(features);
} #else // The memory hooks are not available.
ProfilerFeature::ClearNativeAllocations(features); #endif #if !defined(MOZ_MEMORY) or !defined(MOZ_PROFILER_MEMORY)
ProfilerFeature::ClearMemory(features); #endif
// Default features common to all contexts (even if not available). static constexpr uint32_t DefaultFeatures() { return ProfilerFeature::Java | ProfilerFeature::JS |
ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
ProfilerFeature::Screenshots | ProfilerFeature::ProcessCPU;
}
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not // available). static constexpr uint32_t StartupExtraDefaultFeatures() { // Enable file I/Os by default for startup profiles as startup is heavy on // I/O operations. return ProfilerFeature::FileIOAll | ProfilerFeature::IPCMessages;
}
Json::String ToCompactString(const Json::Value& aJsonValue) {
Json::StreamWriterBuilder builder; // No indentations, and no newlines.
builder["indentation"] = ""; // This removes spaces after colons.
builder["enableYAMLCompatibility"] = false; // Only 6 digits after the decimal point; timestamps in ms have ns precision.
builder["precision"] = 6;
builder["precisionType"] = "decimal";
// RAII class to lock the profiler mutex. // It provides a mechanism to determine if it is locked or not in order for // memory hooks to avoid re-entering the profiler locked state. // Locking order: Profiler, ThreadRegistry, ThreadRegistration. class MOZ_RAII PSAutoLock { public:
PSAutoLock()
: mLock([]() -> mozilla::baseprofiler::detail::BaseProfilerMutex& { // In DEBUG builds, *before* we attempt to lock gPSMutex, we want to // check that the ThreadRegistry, ThreadRegistration, and ProfilingLog // mutexes are *not* locked on this thread, to avoid inversion // deadlocks.
MOZ_ASSERT(!ThreadRegistry::IsRegistryMutexLockedOnCurrentThread());
MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread());
MOZ_ASSERT(!ProfilingLog::IsLockedOnCurrentThread()); return gPSMutex;
}()) {}
// ASYNC POSIX SIGNAL HANDLING SUPPORT // // Integrating POSIX signals // (https://man7.org/linux/man-pages/man7/signal.7.html) into a complex // multi-threaded application such as Firefox can be a tricky proposition. // Signals are delivered by the operating system to a program, which then // invokes a signal handler // (https://man7.org/linux/man-pages/man2/sigaction.2.html) outside the normal // flow of control. This handler is responsible for performing operations in // response to the signal. If there is no "custom" handler defined, then default // behaviour is triggered, which usually results in a terminated program. // // As signal handlers interrupt the normal flow of control, Firefox may not be // in a safe state while the handler is running (e.g. it may be halfway through // a garbage collection cycle, or a critical lock may be held by the current // thread). This is something we must be aware of while writing one, and we are // additionally limited in terms of which POSIX functions we can call to those // which are async signal safe // (https://man7.org/linux/man-pages/man7/signal-safety.7.html). // // In the context of Firefox, this presents a number of details that we must be // aware of: // // * We are very limited by what we can call when we handle a signal: Many // functions in Firefox, and in the profiler specifically, allocate memory // when called. Allocating memory is specifically **not** async-signal-safe, // and so any functions that allocate should not be called from a signal // handler. // // * We need to be careful with how we communicate to other threads in the // process. The signal handler runs asynchronously, interrupting the current // thread of execution. Communication should therefore use atomics or other // concurrency constructs to ensure that data is read and written correctly. // We should avoid taking locks, as we may easily deadlock while within the // signal handler. // // * We cannot use the usual Firefox mechanisms for triggering behaviour in // other threads. For instance, tools such as ``NS_DispatchToMainThread`` // allocate memory when called, which is not allowed within a signal handler. // // We solve these constraints by introducing a new thread within the Firefox // profiler, the AsyncSignalControlThread which is responsible for carrying out // the actions triggered by a signal handler. We communicate between handlers // and this thread with the use of a libc pipe // (https://pubs.opengroup.org/onlinepubs/9699919799/functions/write.html#tag_16_685_08). // Writing to a pipe is async-signal-safe, so we can do so from a signal // handler, and we can set the pipe to be "blocking", meaning that when our // control thread tries to read it will block at the OS level (consuming no CPU) // until the handler writes to it. This is in contrast to (e.g.) an atomic // variable, where our thread would have to "busy wait" for it to be set. // // We have one "control" thread per process, and use a single byte for messages // we send. Writes to pipes are atomic if the size is less than or equal to // ``PIPE_BUF``, which (although implementation defined) in our case is always // one, thus trivially atomic. // // The control flow for a typical Firefox session in which a user starts and // stops profiling using POSIX signals therefore looks something like the // following: // // * Profiler initialization. // // * The main thread of each process starts the signal control thread, and // initialises signal handlers for ``SIGUSR1`` and ``SIGSUR2``. // * The signal control thread sets up pipes for communication, and begins // reading, blocking itself. // // * *After some time...* // * The user sends ``SIGUSR1`` to Firefox, e.g. using ``kill -s USR1 <firefox // pid>`` // // * The profiler_start_signal_handler signal handler for ``SIGUSR1`` is // triggered by the operating system. This writes the "start" control // character to the communication pipe and returns. // * The signal control thread wakes up, as there is now data on the pipe. // * The control thread recognises the "start" character, and starts the // profiler with a set of default presets. // * The control thread loops, and goes back to waiting on the pipe. // // * *The user uses Firefox, or waits for it to do something...* // * The user sends ``SIGUSR2`` to Firefox, e.g. using ``kill -s USR1 <firefox // pid>`` // // * The profiler_stop_signal_handler signal handler for ``SIGUSR2`` is // triggered by the operating system. This writes the "stop" control // character to the communication pipe and returns. // * The signal control thread wakes up, as there is now data on the pipe. // * The control thread recognises the "stop" character, and calls // profiler_stop_signal_handler to dump the profile to disk. // * The control thread loops, and goes back to waiting on the pipe. // // * *The user can now start another profiling session...* //
// Forward declare this, so we can call it from the constructor. staticvoid* AsyncSignalControlThreadEntry(void* aArg);
// Define our platform specific async (posix) signal control thread here. class AsyncSignalControlThread { public:
AsyncSignalControlThread() : mThread() { // Try to open a pipe for this to communicate with. If we can't do this, // then we give up and return, as there's no point continuing without // being able to communicate int pipeFds[2]; if (pipe(pipeFds)) {
LOG("Profiler AsyncSignalControlThread failed to create a pipe."); return;
}
// Close this pipe on calls to exec().
fcntl(pipeFds[0], F_SETFD, FD_CLOEXEC);
fcntl(pipeFds[1], F_SETFD, FD_CLOEXEC);
// Write the reading side to mFd, and the writing side to the global atomic
mFd = pipeFds[0];
sAsyncSignalControlWriteFd = pipeFds[1];
// We don't really care about stack size, as it should be minimal, so // leave the pthread attributes as a nullptr, i.e. choose the default.
pthread_attr_t* attr_ptr = nullptr; if (pthread_create(&mThread, attr_ptr, AsyncSignalControlThreadEntry, this) != 0) {
MOZ_CRASH("pthread_create failed");
}
};
~AsyncSignalControlThread() { // Derived from code in nsDumpUtils.cpp. Comment reproduced here for // poisterity: Close sAsyncSignalControlWriteFd /after/ setting the fd to // -1. Otherwise we have the (admittedly far-fetched) race where we // // 1) close sAsyncSignalControlWriteFd // 2) open a new fd with the same number as sAsyncSignalControlWriteFd // had. // 3) receive a signal, then write to the fd. int asyncSignalControlWriteFd = sAsyncSignalControlWriteFd.exchange(-1); // This will unblock the "read" in StartWatching.
close(asyncSignalControlWriteFd); // Finally, exit the thread.
pthread_join(mThread, nullptr);
};
void Watch() { char msg[1];
ssize_t nread; while (true) { // Try reading from the pipe. This will block until something is written:
nread = read(mFd, msg, sizeof(msg));
if (nread == -1 && errno == EINTR) { // nread == -1 and errno == EINTR means that `read` was interrupted // by a signal before reading any data. This is likely because the // profiling thread interrupted us (with SIGPROF). We can safely ignore // this and "go around" the loop again to try and read. continue;
}
if (nread == -1 && errno != EINTR) { // nread == -1 and errno != EINTR means that `read` has failed in some // way that we can't recover from. In this case, all we can do is give // up, and quit the watcher, as the pipe is likely broken.
LOG("Error (%d) when reading in AsyncSignalControlThread", errno); return;
}
if (nread == 0) { // nread == 0 signals that the other end of the pipe has been cleanly // closed. Close our end, and exit the reading loop.
close(mFd); return;
}
// If we reach here, nread != 0 and nread != -1. This means that we // should have read at least one byte, which should be a control byte // for the profiler. // It *might* happen that `read` is interrupted by the sampler thread // after successfully reading. If this occurs, read returns the number // of bytes read. As anything other than 1 is wrong for us, we can // always assume that we can read whatever `read` read.
MOZ_RELEASE_ASSERT(nread == 1);
if (msg[0] == sAsyncSignalControlCharStart) { // Check to see if the profiler is already running. This is done within // `profiler_start` anyway, but if we check sooner we avoid running all // the other code between now and that check. if (!profiler_is_active()) {
profiler_start_from_signal();
}
} elseif (msg[0] == sAsyncSignalControlCharStop) { // Check to see whether the profiler is even running before trying to // stop the profiler. Most other methods of stopping the profiler (i.e. // those through nsProfiler etc) already know whether or not the // profiler is running, so don't try and stop it if it's already // running. Signal-stopping doesn't have this constraint, so we should // check just in case there is a codepath followed by // `profiler_dump_and_stop` that breaks if we stop while stopped. if (profiler_is_active()) {
profiler_dump_and_stop();
}
} else {
LOG("AsyncSignalControlThread recieved unknown control signal: %c",
msg[0]);
}
}
};
private: // The read side of the pipe that we use to communicate from a signal handler // to the AsyncSignalControlThread int mFd;
// The thread handle for the async signal control thread // Note, that unlike the sampler thread, this is currently a posix-only // feature. Therefore, we don't bother to have a windows equivalent - we // just use a pthread_t
pthread_t mThread;
};
// All functions in this file can run on multiple threads unless they have an // NS_IsMainThread() assertion.
// This class contains the profiler's core global state, i.e. that which is // valid even when the profiler is not active. Most profile operations can't do // anything useful when this class is not instantiated, so we release-assert // its non-nullness in all such operations. // // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a // PSAutoLock reference as an argument as proof that the gPSMutex is currently // locked. This makes it clear when gPSMutex is locked and helps avoid // accidental unlocked accesses to global state. There are ways to circumvent // this mechanism, but please don't do so without *very* good reason and a // detailed explanation. // // The exceptions to this rule: // // - mProcessStartTime, because it's immutable; class CorePS { private: #ifdef MOZ_PERFETTO class PerfettoObserver : public perfetto::TrackEventSessionObserver { public:
PerfettoObserver() { perfetto::TrackEvent::AddSessionObserver(this); }
~PerfettoObserver() { perfetto::TrackEvent::RemoveSessionObserver(this); }
CorePS()
: mProcessStartTime(TimeStamp::ProcessCreation()),
mMaybeBandwidthCounter(nullptr) #ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
,
mAsyncSignalControlThread(nullptr) #endif #ifdef USE_LUL_STACKWALK
,
mLul(nullptr) #endif
{
MOZ_ASSERT(NS_IsMainThread(), "CorePS must be created from the main thread");
}
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex // being locked. This is because CorePS is instantiated so early on the main // thread that we don't have to worry about it being racy. staticbool Exists() { return !!sInstance; }
for (auto& registeredPage : sInstance->mRegisteredPages) {
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
}
// Measurement of the following things may be added later if DMD finds it // is worthwhile: // - CorePS::mRegisteredPages itself (its elements' children are // measured above)
auto foundPageIter = std::find_if(
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
RegisteredPageComparator{aRegisteredPage.get()});
if (foundPageIter != sInstance->mRegisteredPages.end()) { if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) { // When a BrowsingContext is loaded, the first url loaded in it will be // about:blank, and if the principal matches, the first document loaded // in it will share an inner window. That's why we should delete the // intermittent about:blank if they share the inner window.
sInstance->mRegisteredPages.erase(foundPageIter);
} else { // Do not register the same page again. return;
}
}
staticvoid AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
MOZ_ASSERT(sInstance); // we don't own the counter; they may be stored in static objects
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
}
staticvoid RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) { // we may be called to remove a counter after the profiler is stopped or // late in shutdown. if (sInstance) { auto* counter = std::find(sInstance->mCounters.begin(),
sInstance->mCounters.end(), aCounter);
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
sInstance->mCounters.erase(counter);
}
}
private: // The singleton instance static CorePS* sInstance;
// The time that the process started. const TimeStamp mProcessStartTime;
// Network bandwidth counter for the Bandwidth feature.
ProfilerBandwidthCounter* mMaybeBandwidthCounter;
// Info on all the registered pages. // InnerWindowIDs in mRegisteredPages are unique.
Vector<RefPtr<PageInformation>> mRegisteredPages;
// Non-owning pointers to all active counters
Vector<BaseProfilerCount*> mCounters;
#ifdefined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL) // Background thread for communicating with async signal handlers
AsyncSignalControlThread* mAsyncSignalControlThread; #endif
#ifdef USE_LUL_STACKWALK // LUL's state. Null prior to the first activation, non-null thereafter. // Owned by this CorePS.
mozilla::Atomic<lul::LUL*> mLul; #endif
// Process name, provided by child process initialization code.
nsAutoCString mProcessName; // Private name, provided by child process initialization code (eTLD+1 in // fission)
nsAutoCString mETLDplus1;
// This memory buffer is used by the MergeStacks mechanism. Previously it was // stack allocated, but this led to a stack overflow, as it was too much // memory. Here the buffer can be pre-allocated, and shared with the // MergeStacks feature as needed. MergeStacks is only run while holding the // lock, so it is safe to have only one instance allocated for all of the // threads.
JsFrameBuffer mJsFrames;
// Cached download directory for when we need to dump profiles to disk. #if !defined(XP_WIN)
Maybe<nsCOMPtr<nsIFile>> mAsyncSignalDumpDirectory; #endif
};
void locked_profiler_remove_sampled_counter(PSLockRef aLock,
BaseProfilerCount* aCounter) { // Note: we don't enforce a final sample, though we could do so if the // profiler was active
CorePS::RemoveCounter(aLock, aCounter);
}
// This class contains the profiler's global state that is valid only when the // profiler is active. When not instantiated, the profiler is inactive. // // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as // CorePS. // class ActivePS { private:
constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) { return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
scBytesPerEntry / scMinimumNumberOfChunks,
size_t(scMaximumChunkSize)));
}
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) { // Filter out any features unavailable in this platform/configuration.
aFeatures &= AvailableFeatures();
// Some features imply others. if (aFeatures & ProfilerFeature::FileIOAll) {
aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
} elseif (aFeatures & ProfilerFeature::FileIO) {
aFeatures |= ProfilerFeature::MainThreadIO;
}
if (aFeatures & ProfilerFeature::CPUAllThreads) {
aFeatures |= ProfilerFeature::CPUUtilization;
}
ActivePS(
PSLockRef aLock, const TimeStamp& aProfilingStartTime,
PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, constchar** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, const Maybe<double>& aDuration,
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull)
: mProfilingStartTime(aProfilingStartTime),
mGeneration(sNextGeneration++),
mCapacity(aCapacity),
mDuration(aDuration),
mInterval(aInterval),
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
mActiveTabID(aActiveTabID),
mProfileBufferChunkManager(
aChunkManagerOrNull
? std::move(aChunkManagerOrNull)
: MakeUnique<ProfileBufferChunkManagerWithLocalLimit>(
size_t(ClampToAllowedEntries(aCapacity.Value())) *
scBytesPerEntry,
ChunkSizeForEntries(aCapacity.Value()))),
mProfileBuffer([this]() -> ProfileChunkedBuffer& {
ProfileChunkedBuffer& coreBuffer = profiler_get_core_buffer();
coreBuffer.SetChunkManagerIfDifferent(*mProfileBufferChunkManager); return coreBuffer;
}()),
mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures)
? new ProcessCPUCounter(aLock)
: nullptr),
mMaybePowerCounters(nullptr),
mMaybeCPUFreq(nullptr), // The new sampler thread doesn't start sampling immediately because the // main loop within Run() is blocked until this function's caller // unlocks gPSMutex.
mSamplerThread(
NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)),
mIsPaused(false),
mIsSamplingPaused(false) {
ProfilingLog::Init();
// Deep copy and lower-case aFilters.
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount)); for (uint32_t i = 0; i < aFilterCount; ++i) {
mFilters[i] = aFilters[i];
mFiltersLowered[i].reserve(mFilters[i].size());
std::transform(mFilters[i].cbegin(), mFilters[i].cend(),
std::back_inserter(mFiltersLowered[i]), ::tolower);
}
#if !defined(RELEASE_OR_BETA) if (ShouldInterposeIOs()) { // We need to register the observer on the main thread, because we want // to observe IO that happens on the main thread. // IOInterposer needs to be initialized before calling // IOInterposer::Register or our observer will be silently dropped. if (NS_IsMainThread()) {
IOInterposer::Init();
IOInterposer::Register(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::ActivePS", []() { // Note: This could theoretically happen after ActivePS gets // destroyed, but it's ok: // - The Observer always checks that the profiler is (still) // active before doing its work. // - The destruction should happen on the same thread as this // construction, so the un-registration will also be dispatched // and queued on the main thread, and run after this.
IOInterposer::Init();
IOInterposer::Register(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
} #endif
if (ProfilerFeature::HasPower(aFeatures)) {
mMaybePowerCounters = new PowerCounters(); for (constauto& powerCounter : mMaybePowerCounters->GetCounters()) {
locked_profiler_add_sampled_counter(aLock, powerCounter.get());
}
}
if (ProfilerFeature::HasCPUFrequency(aFeatures)) {
mMaybeCPUFreq = new ProfilerCPUFreq();
}
}
~ActivePS() {
MOZ_ASSERT(
!mMaybeProcessCPUCounter, "mMaybeProcessCPUCounter should have been deleted before ~ActivePS()");
MOZ_ASSERT(
!mMaybePowerCounters, "mMaybePowerCounters should have been deleted before ~ActivePS()");
MOZ_ASSERT(!mMaybeCPUFreq, "mMaybeCPUFreq should have been deleted before ~ActivePS()"); #ifdefined(MOZ_MEMORY) && defined(MOZ_PROFILER_MEMORY)
MOZ_ASSERT(!mMemoryCounter, "mMemoryCounter should have been deleted before ~ActivePS()"); #endif
#if !defined(RELEASE_OR_BETA) if (ShouldInterposeIOs()) { // We need to unregister the observer on the main thread, because that's // where we've registered it. if (NS_IsMainThread()) {
IOInterposer::Unregister(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::~ActivePS", []() {
IOInterposer::Unregister(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
} #endif if (mProfileBufferChunkManager) { // We still control the chunk manager, remove it from the core buffer.
profiler_get_core_buffer().ResetChunkManager();
}
ProfilingLog::Destroy();
}
bool ThreadSelected(constchar* aThreadName) { if (mFiltersLowered.empty()) { returntrue;
}
std::string name = aThreadName;
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
for (constauto& filter : mFiltersLowered) { if (filter == "*") { returntrue;
}
// Crude, non UTF-8 compatible, case insensitive substring search if (name.find(filter) != std::string::npos) { returntrue;
}
// If the filter is "pid:<my pid>", profile all threads. if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) { returntrue;
}
}
ProfilerBandwidthCounter* counter = CorePS::GetBandwidthCounter(); if (counter && counter->IsRegistered()) { // Because profiler_count_bandwidth_bytes does a racy // profiler_feature_active check to avoid taking the lock, // free'ing the memory of the counter would be crashy if the // socket thread attempts to increment the counter while we are // stopping the profiler. // Instead, we keep the counter in CorePS and only mark it as // unregistered so that the next attempt to count bytes // will re-register it.
locked_profiler_remove_sampled_counter(aLock, counter);
counter->MarkUnregistered();
}
auto samplerThread = sInstance->mSamplerThread; delete sInstance;
sInstance = nullptr;
n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it // is worthwhile: // - mLiveProfiledThreads (both the array itself, and the contents) // - mDeadProfiledThreads (both the array itself, and the contents) //
return n;
}
static ThreadProfilingFeatures ProfilingFeaturesForThread(
PSLockRef aLock, const ThreadRegistrationInfo& aInfo) {
MOZ_ASSERT(sInstance); if (sInstance->ThreadSelected(aInfo.Name())) { // This thread was selected by the user, record everything. return ThreadProfilingFeatures::Any;
}
ThreadProfilingFeatures features = ThreadProfilingFeatures::NotProfiled; if (ActivePS::FeatureCPUAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::CPUUtilization);
} if (ActivePS::FeatureSamplingAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Sampling);
} if (ActivePS::FeatureMarkersAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Markers);
} return features;
}
// Writes out the current active configuration of the profile. staticvoid WriteActiveConfiguration(
PSLockRef aLock, JSONWriter& aWriter, const Span<constchar>& aPropertyName = MakeStringSpan("")) { if (!sInstance) { if (!aPropertyName.empty()) {
aWriter.NullProperty(aPropertyName);
} else {
aWriter.NullElement();
} return;
};
if (!aPropertyName.empty()) {
aWriter.StartObjectProperty(aPropertyName);
} else {
aWriter.StartObjectElement();
}
PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES) #undef WRITE_ACTIVE_FEATURES
aWriter.EndArray();
}
{
aWriter.StartArrayProperty("threads"); for (constauto& filter : sInstance->mFilters) {
aWriter.StringElement(filter);
}
aWriter.EndArray();
}
{ // Now write all the simple values.
// The interval is also available on profile.meta.interval
aWriter.DoubleProperty("interval", sInstance->mInterval);
aWriter.IntProperty("capacity", sInstance->mCapacity.Value()); if (sInstance->mDuration) {
aWriter.DoubleProperty("duration", sInstance->mDuration.value());
} // Here, we are converting uint64_t to double. Tab IDs are // being created using `nsContentUtils::GenerateProcessSpecificId`, which // is specifically designed to only use 53 of the 64 bits to be lossless // when passed into and out of JS as a double.
aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
}
aWriter.EndObject();
}
// Not using PS_GET, because only the "Controlled" interface of // `mProfileBufferChunkManager` should be exposed here. static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
PSLockRef) {
MOZ_ASSERT(sInstance);
MOZ_ASSERT(sInstance->mProfileBufferChunkManager); return *sInstance->mProfileBufferChunkManager;
}
staticvoid FulfillChunkRequests(PSLockRef) {
MOZ_ASSERT(sInstance); if (sInstance->mProfileBufferChunkManager) {
sInstance->mProfileBufferChunkManager->FulfillChunkRequests();
}
}
struct ProfiledThreadListElement {
TimeStamp mRegisterTime;
JSContext* mJSContext; // Null for unregistered threads.
ProfiledThreadData* mProfiledThreadData;
}; using ProfiledThreadList = Vector<ProfiledThreadListElement>;
// Returns a ProfiledThreadList with all threads that should be included in a // profile, both for threads that are still registered, and for threads that // have been unregistered but still have data in the buffer. // The returned array is sorted by thread register time. // Do not hold on to the return value past LockedRegistry. static ProfiledThreadList ProfiledThreads(
ThreadRegistry::LockedRegistry& aLockedRegistry, PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ProfiledThreadList array;
MOZ_RELEASE_ASSERT(
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
sInstance->mDeadProfiledThreads.length()));
for (ThreadRegistry::OffThreadRef offThreadRef : aLockedRegistry) {
ProfiledThreadData* profiledThreadData =
offThreadRef.UnlockedRWForLockedProfilerRef().GetProfiledThreadData(
aLock); if (!profiledThreadData) { // This thread was not profiled, continue with the next one. continue;
}
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
offThreadRef.GetLockedRWFromAnyThread();
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
profiledThreadData->Info().RegisterTime(),
lockedThreadData->GetJSContext(), profiledThreadData}));
}
for (auto& t : sInstance->mDeadProfiledThreads) {
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
t->Info().RegisterTime(), (JSContext*)nullptr, t.get()}));
}
std::sort(array.begin(), array.end(),
[](const ProfiledThreadListElement& a, const ProfiledThreadListElement& b) { return a.mRegisterTime < b.mRegisterTime;
}); return array;
}
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
Vector<RefPtr<PageInformation>> array; for (auto& d : CorePS::RegisteredPages(aLock)) {
MOZ_RELEASE_ASSERT(array.append(d));
} for (auto& d : sInstance->mDeadProfiledPages) {
MOZ_RELEASE_ASSERT(array.append(d));
} // We don't need to sort the pages like threads since we won't show them // as a list. return array;
}
// Find the right entry in the mLiveProfiledThreads array and remove the // element, moving the ProfiledThreadData object for the thread into the // mDeadProfiledThreads array. for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i]; if (thread.mProfiledThreadData == aProfiledThreadData) {
thread.mProfiledThreadData->NotifyUnregistered(
sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
std::move(thread.mProfiledThreadData)));
sInstance->mLiveProfiledThreads.erase(
&sInstance->mLiveProfiledThreads[i]); return;
}
}
}
// This is a counter to collect process CPU utilization during profiling. // It cannot be a raw `ProfilerCounter` because we need to manually add/remove // it while the profiler lock is already held. class ProcessCPUCounter final : public AtomicProfilerCount { public: explicit ProcessCPUCounter(PSLockRef aLock)
: AtomicProfilerCount("processCPU", &mCounter, nullptr, "CPU", "Process CPU utilization") { // Adding on construction, so it's ready before the sampler starts.
locked_profiler_add_sampled_counter(aLock, this); // Note: Removed from ActivePS::Destroy, because a lock is needed.
}
staticvoid ClearExpiredExitProfiles(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); // Discard exit profiles that were gathered before our buffer RangeStart. // If we have started to overwrite our data from when the Base profile was // added, we should get rid of that Base profile because it's now older than // our oldest Gecko profile data. // // When adding: (In practice the starting buffer should be empty) // v Start == End // | <-- Buffer range, initially empty. // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it // // Later, still in range: // v Start v End // |=========| <-- Buffer range growing. // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it // // Even later, now out of range: // v Start v End // |============| <-- Buffer range full and sliding. // ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it if (sInstance->mBaseProfileThreads &&
sInstance->mGeckoIndexWhenBaseProfileAdded
.ConvertToProfileBufferIndex() <
profiler_get_core_buffer().GetState().mRangeStart) {
DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
sInstance->mBaseProfileThreads.get());
sInstance->mBaseProfileThreads.reset();
}
sInstance->mExitProfiles.eraseIf(
[bufferRangeStart](const ExitProfile& aExitProfile) { return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
});
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.