/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// There are three kinds of samples done by the profiler. // // - A "periodic" sample is the most complex kind. It is done in response to a // timer while the profiler is active. It involves writing a stack trace plus // a variety of other values (memory measurements, responsiveness // measurements, etc.) into the main ProfileBuffer. The sampling is done from // off-thread, and so SuspendAndSampleAndResumeThread() is used to get the // register values. // // - A "synchronous" sample is a simpler kind. It is done in response to an API // call (profiler_get_backtrace()). It involves writing a stack trace and // little else into a temporary ProfileBuffer, and wrapping that up in a // ProfilerBacktrace that can be subsequently used in a marker. The sampling // is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the // register values. // // - A "backtrace" sample is the simplest kind. It is done in response to an // API call (profiler_suspend_and_sample_thread()). It involves getting a // stack trace via a ProfilerStackCollector; it does not write to a // ProfileBuffer. The sampling is done from off-thread, and so uses // SuspendAndSampleAndResumeThread() to get the register values.
// Win32 builds always have frame pointers, so FramePointerStackWalk() always // works. #ifdefined(GP_PLAT_x86_windows) # define HAVE_NATIVE_UNWIND # define USE_FRAME_POINTER_STACK_WALK #endif
// Win64 builds always omit frame pointers, so we use the slower // MozStackWalk(), which works in that case. #ifdefined(GP_PLAT_amd64_windows) # define HAVE_NATIVE_UNWIND # define USE_MOZ_STACK_WALK #endif
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower // MozStackWalk(). #ifdefined(GP_PLAT_arm64_windows) # define HAVE_NATIVE_UNWIND # define USE_MOZ_STACK_WALK #endif
// Mac builds use FramePointerStackWalk(). Even if we build without // frame pointers, we'll still get useful stacks in system libraries // because those always have frame pointers. // We don't use MozStackWalk() on Mac. #ifdefined(GP_OS_darwin) # define HAVE_NATIVE_UNWIND # define USE_FRAME_POINTER_STACK_WALK #endif
// No stack-walking in baseprofiler on linux, android, bsd. // APIs now make it easier to capture backtraces from the Base Profiler, which // is currently not supported on these platform, and would lead to a MOZ_CRASH // in REGISTERS_SYNC_POPULATE(). `#if 0` added in bug 1658232, follow-up bugs // should be referenced in meta bug 1557568. #if 0 // Android builds use the ARM Exception Handling ABI to unwind. # ifdefined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) # define HAVE_NATIVE_UNWIND # define USE_EHABI_STACKWALK # include "EHABIStackWalk.h" # endif
// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks. # ifdefined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \ defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \ defined(GP_PLAT_arm64_freebsd) # define HAVE_NATIVE_UNWIND # define USE_LUL_STACKWALK # include "lul/LulMain.h" # include "lul/platform-linux-lul.h"
// On linux we use LUL for periodic samples and synchronous samples, but we use // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled. // (See the comment at the top of the file for a definition of // periodic/synchronous/backtrace.). // // FramePointerStackWalk can produce incomplete stacks when the current entry is // in a shared library without framepointers, however LUL can take a long time // to initialize, which is undesirable for consumers of // profiler_suspend_and_sample_thread like the Background Hang Reporter. # ifdefined(MOZ_PROFILING) # define USE_FRAME_POINTER_STACK_WALK # endif # endif #endif
// We can only stackwalk without expensive initialization on platforms which // support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires // initializing LUL, and EHABIStackWalk requires initializing EHABI, both of // which can be expensive. #ifdefined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) # define HAVE_FASTINIT_NATIVE_UNWIND #endif
ProfileChunkedBuffer& profiler_get_core_buffer() { // This needs its own mutex, because it is used concurrently from functions // guarded by gPSMutex as well as others without safety (e.g., // profiler_add_marker). It is *not* used inside the critical section of the // sampler, because mutexes cannot be used there. static ProfileChunkedBuffer sProfileChunkedBuffer{
ProfileChunkedBuffer::ThreadSafety::WithMutex}; return sProfileChunkedBuffer;
}
// Add all the possible features.
BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
#undef ADD_FEATURE
// Now remove features not supported on this platform/configuration.
ProfilerFeature::ClearJava(features);
ProfilerFeature::ClearJS(features);
ProfilerFeature::ClearScreenshots(features); #if !defined(HAVE_NATIVE_UNWIND)
ProfilerFeature::ClearStackWalk(features); #endif #if !defined(GP_OS_windows)
ProfilerFeature::ClearNoTimerResolutionChange(features); #endif
return features;
}
// Default features common to all contexts (even if not available). static constexpr uint32_t DefaultFeatures() { return ProfilerFeature::Java | ProfilerFeature::JS |
ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
ProfilerFeature::ProcessCPU;
}
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not // available). static constexpr uint32_t StartupExtraDefaultFeatures() { // Enable mainthreadio by default for startup profiles as startup is heavy on // I/O operations, and main thread I/O is really important to see there. return ProfilerFeature::MainThreadIO | ProfilerFeature::IPCMessages;
}
// The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS. // Use `PSAutoLock lock;` to take the lock until the end of the enclosing block. // External profilers may use this same lock for their own data, but as the lock // is non-recursive, *only* `f(PSLockRef, ...)` functions below should be // called, to avoid double-locking. class MOZ_RAII PSAutoLock { public:
PSAutoLock() : mLock(gPSMutex) {}
// All functions in this file can run on multiple threads unless they have an // NS_IsMainThread() assertion.
// This class contains the profiler's core global state, i.e. that which is // valid even when the profiler is not active. Most profile operations can't do // anything useful when this class is not instantiated, so we release-assert // its non-nullness in all such operations. // // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a // PSAutoLock reference as an argument as proof that the gPSMutex is currently // locked. This makes it clear when gPSMutex is locked and helps avoid // accidental unlocked accesses to global state. There are ways to circumvent // this mechanism, but please don't do so without *very* good reason and a // detailed explanation. // // The exceptions to this rule: // // - mProcessStartTime, because it's immutable; // // - each thread's RacyRegisteredThread object is accessible without locking via // TLSRegisteredThread::RacyRegisteredThread(). class CorePS { private:
CorePS()
: mProcessStartTime(TimeStamp::ProcessCreation()) #ifdef USE_LUL_STACKWALK
,
mLul(nullptr) #endif
{
}
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex // being locked. This is because CorePS is instantiated so early on the main // thread that we don't have to worry about it being racy. staticbool Exists() { return !!sInstance; }
for (auto& registeredThread : sInstance->mRegisteredThreads) {
aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
}
for (auto& registeredPage : sInstance->mRegisteredPages) {
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
}
// Measurement of the following things may be added later if DMD finds it // is worthwhile: // - CorePS::mRegisteredThreads itself (its elements' children are // measured above) // - CorePS::mRegisteredPages itself (its elements' children are // measured above) // - CorePS::mInterposeObserver
#ifdefined(USE_LUL_STACKWALK) if (sInstance->mLul) {
aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
} #endif
}
// No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(const TimeStamp&, ProcessStartTime)
auto foundPageIter = std::find_if(
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
RegisteredPageComparator{aRegisteredPage.get()});
if (foundPageIter != sInstance->mRegisteredPages.end()) { if ((*foundPageIter)->Url() == "about:blank") { // When a BrowsingContext is loaded, the first url loaded in it will be // about:blank, and if the principal matches, the first document loaded // in it will share an inner window. That's why we should delete the // intermittent about:blank if they share the inner window.
sInstance->mRegisteredPages.erase(foundPageIter);
} else { // Do not register the same page again. return;
}
}
MOZ_RELEASE_ASSERT(
sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
}
staticvoid RemoveRegisteredPage(PSLockRef,
uint64_t aRegisteredInnerWindowID) {
MOZ_ASSERT(sInstance); // Remove RegisteredPage from mRegisteredPages by given inner window ID.
sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) { return rd->InnerWindowID() == aRegisteredInnerWindowID;
});
}
staticvoid AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
MOZ_ASSERT(sInstance); // we don't own the counter; they may be stored in static objects
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
}
staticvoid RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) { // we may be called to remove a counter after the profiler is stopped or // late in shutdown. if (sInstance) { auto* counter = std::find(sInstance->mCounters.begin(),
sInstance->mCounters.end(), aCounter);
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
sInstance->mCounters.erase(counter);
}
}
private: // The singleton instance static CorePS* sInstance;
// The time that the process started. const TimeStamp mProcessStartTime;
// Info on all the registered threads. // ThreadIds in mRegisteredThreads are unique.
Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
// Info on all the registered pages. // InnerWindowIDs in mRegisteredPages are unique.
Vector<RefPtr<PageInformation>> mRegisteredPages;
// Non-owning pointers to all active counters
Vector<BaseProfilerCount*> mCounters;
#ifdef USE_LUL_STACKWALK // LUL's state. Null prior to the first activation, non-null thereafter.
UniquePtr<lul::LUL> mLul; #endif
// Process name, provided by child process initialization code.
std::string mProcessName; // Private name, provided by child process initialization code (eTLD+1 in // fission)
std::string mETLDplus1;
};
// The buffer size is provided as a number of "entries", this is their size in // bytes.
constexpr static uint32_t scBytesPerEntry = 8;
// This class contains the profiler's global state that is valid only when the // profiler is active. When not instantiated, the profiler is inactive. // // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as // CorePS. // class ActivePS { private:
constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) { return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
scBytesPerEntry / scMinimumNumberOfChunks,
size_t(scMaximumChunkSize)));
}
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) { // Filter out any features unavailable in this platform/configuration.
aFeatures &= AvailableFeatures();
// Some features imply others. if (aFeatures & ProfilerFeature::FileIOAll) {
aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
} elseif (aFeatures & ProfilerFeature::FileIO) {
aFeatures |= ProfilerFeature::MainThreadIO;
}
return aFeatures;
}
ActivePS(PSLockRef aLock, const TimeStamp& aProfilingStartTime,
PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, constchar** aFilters, uint32_t aFilterCount, const Maybe<double>& aDuration)
: mProfilingStartTime(aProfilingStartTime),
mGeneration(sNextGeneration++),
mCapacity(aCapacity),
mDuration(aDuration),
mInterval(aInterval),
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
mProfileBufferChunkManager(
MakeUnique<ProfileBufferChunkManagerWithLocalLimit>(
size_t(ClampToAllowedEntries(aCapacity.Value())) *
scBytesPerEntry,
ChunkSizeForEntries(aCapacity.Value()))),
mProfileBuffer([this]() -> ProfileChunkedBuffer& {
ProfileChunkedBuffer& buffer = profiler_get_core_buffer();
buffer.SetChunkManager(*mProfileBufferChunkManager); return buffer;
}()), // The new sampler thread doesn't start sampling immediately because the // main loop within Run() is blocked until this function's caller // unlocks gPSMutex.
mSamplerThread(
NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)),
mIsPaused(false),
mIsSamplingPaused(false) { // Deep copy and lower-case aFilters.
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount)); for (uint32_t i = 0; i < aFilterCount; ++i) {
mFilters[i] = aFilters[i];
mFiltersLowered[i].reserve(mFilters[i].size());
std::transform(mFilters[i].cbegin(), mFilters[i].cend(),
std::back_inserter(mFiltersLowered[i]), ::tolower);
}
}
~ActivePS() { if (mProfileBufferChunkManager) { // We still control the chunk manager, remove it from the core buffer.
profiler_get_core_buffer().ResetChunkManager();
}
}
bool ThreadSelected(constchar* aThreadName) { if (mFiltersLowered.empty()) { returntrue;
}
std::string name = aThreadName;
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
for (constauto& filter : mFiltersLowered) { if (filter == "*") { returntrue;
}
// Crude, non UTF-8 compatible, case insensitive substring search if (name.find(filter) != std::string::npos) { returntrue;
}
// If the filter is "pid:<my pid>", profile all threads. if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) { returntrue;
}
}
n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it // is worthwhile: // - mLiveProfiledThreads (both the array itself, and the contents) // - mDeadProfiledThreads (both the array itself, and the contents) //
// Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs // for all threads that should be included in a profile, both for threads // that are still registered, and for threads that have been unregistered but // still have data in the buffer. // For threads that have already been unregistered, the RegisteredThread // pointer will be null. // The returned array is sorted by thread register time. // Do not hold on to the return value across thread registration or profiler // restarts. static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
ProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
MOZ_RELEASE_ASSERT(
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
sInstance->mDeadProfiledThreads.length())); for (auto& t : sInstance->mLiveProfiledThreads) {
MOZ_RELEASE_ASSERT(array.append(
std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
} for (auto& t : sInstance->mDeadProfiledThreads) {
MOZ_RELEASE_ASSERT(
array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
}
std::sort(array.begin(), array.end(),
[](const std::pair<RegisteredThread*, ProfiledThreadData*>& a, const std::pair<RegisteredThread*, ProfiledThreadData*>& b) { return a.second->Info()->RegisterTime() <
b.second->Info()->RegisterTime();
}); return array;
}
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
Vector<RefPtr<PageInformation>> array; for (auto& d : CorePS::RegisteredPages(aLock)) {
MOZ_RELEASE_ASSERT(array.append(d));
} for (auto& d : sInstance->mDeadProfiledPages) {
MOZ_RELEASE_ASSERT(array.append(d));
} // We don't need to sort the pages like threads since we won't show them // as a list. return array;
}
// Do a linear search through mLiveProfiledThreads to find the // ProfiledThreadData object for a RegisteredThread. static ProfiledThreadData* GetProfiledThreadData(
PSLockRef, RegisteredThread* aRegisteredThread) {
MOZ_ASSERT(sInstance); for (const LiveProfiledThreadData& thread :
sInstance->mLiveProfiledThreads) { if (thread.mRegisteredThread == aRegisteredThread) { return thread.mProfiledThreadData.get();
}
} return nullptr;
}
// Find the right entry in the mLiveProfiledThreads array and remove the // element, moving the ProfiledThreadData object for the thread into the // mDeadProfiledThreads array. // The thread's RegisteredThread object gets destroyed here. for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i]; if (thread.mRegisteredThread == aRegisteredThread) {
thread.mProfiledThreadData->NotifyUnregistered(
sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
std::move(thread.mProfiledThreadData)));
sInstance->mLiveProfiledThreads.erase(
&sInstance->mLiveProfiledThreads[i]); return;
}
}
}
PS_GET_AND_SET(bool, IsPaused)
// True if sampling is paused (though generic `SetIsPaused()` or specific // `SetIsSamplingPaused()`). staticbool IsSamplingPaused(PSLockRef lock) {
MOZ_ASSERT(sInstance); return IsPaused(lock) || sInstance->mIsSamplingPaused;
}
private: // The singleton instance. static ActivePS* sInstance;
const TimeStamp mProfilingStartTime;
// We need to track activity generations. If we didn't we could have the // following scenario. // // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks // gPSMutex, deletes the SamplerThread (which does a join). // // - profiler_start() runs on a different thread, locks gPSMutex, // re-instantiates ActivePS, unlocks gPSMutex -- all before the join // completes. // // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated, // and continues as if the start/stop pair didn't occur. Also // profiler_stop() is stuck, unable to finish. // // By checking ActivePS *and* the generation, we can avoid this scenario. // sNextGeneration is used to track the next generation number; it is static // because it must persist across different ActivePS instantiations. const uint32_t mGeneration; static uint32_t sNextGeneration;
// The maximum number of 8-byte entries in mProfileBuffer. const PowerOfTwo32 mCapacity;
// The maximum duration of entries in mProfileBuffer, in seconds. const Maybe<double> mDuration;
// The interval between samples, measured in milliseconds. constdouble mInterval;
// The profile features that are enabled. const uint32_t mFeatures;
// Substrings of names of threads we want to profile.
Vector<std::string> mFilters;
Vector<std::string> mFiltersLowered;
// The chunk manager used by `mProfileBuffer` below. // May become null if it gets transferred to the Gecko Profiler.
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> mProfileBufferChunkManager;
// The buffer into which all samples are recorded.
ProfileBuffer mProfileBuffer;
// ProfiledThreadData objects for any threads that were profiled at any point // during this run of the profiler: // - mLiveProfiledThreads contains all threads that are still registered, and // - mDeadProfiledThreads contains all threads that have already been // unregistered but for which there is still data in the profile buffer.
Vector<LiveProfiledThreadData> mLiveProfiledThreads;
Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
// Info on all the dead pages. // Registered pages are being moved to this array after unregistration. // We are keeping them in case we need them in the profile data. // We are removing them when we ensure that we won't need them anymore.
Vector<RefPtr<PageInformation>> mDeadProfiledPages;
// The current sampler thread. This class is not responsible for destroying // the SamplerThread object; the Destroy() method returns it so the caller // can destroy it.
SamplerThread* const mSamplerThread;
// Is the profiler fully paused? bool mIsPaused;
// Is the profiler periodic sampling paused? bool mIsSamplingPaused;
/* static */ bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) {
uint32_t af = sActiveAndFeatures; // copy it first return (af & Active) && (af & aFeature);
}
/* static */ bool RacyFeatures::IsActiveWithoutFeature(uint32_t aFeature) {
uint32_t af = sActiveAndFeatures; // copy it first return (af & Active) && !(af & aFeature);
}
/* static */ bool RacyFeatures::IsActiveAndUnpaused() {
uint32_t af = sActiveAndFeatures; // copy it first return (af & Active) && !(af & Paused);
}
/* static */ bool RacyFeatures::IsActiveAndSamplingUnpaused() {
uint32_t af = sActiveAndFeatures; // copy it first return (af & Active) && !(af & (Paused | SamplingPaused));
}
// Each live thread has a RegisteredThread, and we store a reference to it in // TLS. This class encapsulates that TLS. class TLSRegisteredThread { public: staticbool Init(PSLockRef) { bool ok1 = sRegisteredThread.init(); bool ok2 = AutoProfilerLabel::sProfilingStack.init(); return ok1 && ok2;
}
// Get the entire RegisteredThread. Accesses are guarded by gPSMutex. staticclass RegisteredThread* RegisteredThread(PSLockRef) { return sRegisteredThread.get();
}
// Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex. staticclass RacyRegisteredThread* RacyRegisteredThread() { class RegisteredThread* registeredThread = sRegisteredThread.get(); return registeredThread ? ®isteredThread->RacyRegisteredThread()
: nullptr;
}
// Get only the ProfilingStack. Accesses are not guarded by gPSMutex. // RacyRegisteredThread() can also be used to get the ProfilingStack, but that // is marginally slower because it requires an extra pointer indirection. static ProfilingStack* Stack() { return AutoProfilerLabel::sProfilingStack.get();
}
private: // This is a non-owning reference to the RegisteredThread; // CorePS::mRegisteredThreads is the owning reference. On thread // deregistration, this reference is cleared and the RegisteredThread is // destroyed. static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
};
// Although you can access a thread's ProfilingStack via // TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer // directly to the ProfilingStack. Here's why. // // - We need to be able to push to and pop from the ProfilingStack in // AutoProfilerLabel. // // - The class functions are hot and must be defined in BaseProfiler.h so they // can be inlined. // // - We don't want to expose TLSRegisteredThread (and RegisteredThread) in // BaseProfiler.h. // // This second pointer isn't ideal, but does provide a way to satisfy those // constraints. TLSRegisteredThread is responsible for updating it.
MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;
// The name of the main thread. staticconstchar* const kMainThreadName = "GeckoMain";
//////////////////////////////////////////////////////////////////////// // BEGIN sampling/unwinding code
// Additional registers that have to be saved when thread is paused. #ifdefined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) || \ defined(GP_ARCH_x86) # define UNWINDING_REGS_HAVE_ECX_EDX #elifdefined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ defined(GP_PLAT_amd64_freebsd) || defined(GP_ARCH_amd64) || \ defined(__x86_64__) # define UNWINDING_REGS_HAVE_R10_R12 #elifdefined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) # define UNWINDING_REGS_HAVE_LR_R7 #elifdefined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ defined(GP_PLAT_arm64_freebsd) || defined(GP_ARCH_arm64) || \ defined(__aarch64__) # define UNWINDING_REGS_HAVE_LR_R11 #endif
// The registers used for stack unwinding and a few other sampling purposes. // The ctor does nothing; users are responsible for filling in the fields. class Registers { public:
Registers()
: mPC{nullptr},
mSP{nullptr},
mFP{nullptr} #ifdefined(UNWINDING_REGS_HAVE_ECX_EDX)
,
mEcx{nullptr},
mEdx{nullptr} #elifdefined(UNWINDING_REGS_HAVE_R10_R12)
,
mR10{nullptr},
mR12{nullptr} #elifdefined(UNWINDING_REGS_HAVE_LR_R7)
,
mLR{nullptr},
mR7{nullptr} #elifdefined(UNWINDING_REGS_HAVE_LR_R11)
,
mLR{nullptr},
mR11{nullptr} #endif
{
}
void Clear() { memset(this, 0, sizeof(*this)); }
// These fields are filled in by // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace // samples, and by REGISTERS_SYNC_POPULATE for synchronous samples.
Address mPC; // Instruction pointer.
Address mSP; // Stack pointer.
Address mFP; // Frame pointer. #ifdefined(UNWINDING_REGS_HAVE_ECX_EDX)
Address mEcx; // Temp for return address.
Address mEdx; // Temp for frame pointer. #elifdefined(UNWINDING_REGS_HAVE_R10_R12)
Address mR10; // Temp for return address.
Address mR12; // Temp for frame pointer. #elifdefined(UNWINDING_REGS_HAVE_LR_R7)
Address mLR; // ARM link register, or temp for return address.
Address mR7; // Temp for frame pointer. #elifdefined(UNWINDING_REGS_HAVE_LR_R11)
Address mLR; // ARM link register, or temp for return address.
Address mR11; // Temp for frame pointer. #endif
#ifdefined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) // This contains all the registers, which means it duplicates the four fields // above. This is ok.
ucontext_t* mContext; // The context from the signal handler. #endif
};
// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time // looping on corrupted stacks. staticconst size_t MAX_NATIVE_FRAMES = 1024;
struct NativeStack { void* mPCs[MAX_NATIVE_FRAMES]; void* mSPs[MAX_NATIVE_FRAMES];
size_t mCount; // Number of frames filled.
NativeStack() : mPCs(), mSPs(), mCount(0) {}
};
// Merges the profiling stack and native stack, outputting the details to // aCollector. staticvoid MergeStacks(bool aIsSynchronous, const RegisteredThread& aRegisteredThread, const NativeStack& aNativeStack,
ProfilerStackCollector& aCollector) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
Maybe<uint64_t> samplePosInBuffer; if (!aIsSynchronous) { // aCollector.SamplePositionInBuffer() will return Nothing() when // profiler_suspend_and_sample_thread is called from the background hang // reporter.
samplePosInBuffer = aCollector.SamplePositionInBuffer();
} // While the profiling stack array is ordered oldest-to-youngest, the JS and // native arrays are ordered youngest-to-oldest. We must add frames to aInfo // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS // and native arrays backwards. Note: this means the terminating condition // jsIndex and nativeIndex is being < 0.
uint32_t profilingStackIndex = 0;
int32_t nativeIndex = aNativeStack.mCount - 1;
uint8_t* lastLabelFrameStackAddr = nullptr;
// Iterate as long as there is at least one frame remaining. while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) { // There are 1 to 3 frames available. Find and add the oldest.
uint8_t* profilingStackAddr = nullptr;
uint8_t* nativeStackAddr = nullptr;
if (profilingStackIndex != profilingStackFrameCount) { const ProfilingStackFrame& profilingStackFrame =
profilingStackFrames[profilingStackIndex];
if (profilingStackFrame.isLabelFrame() ||
profilingStackFrame.isSpMarkerFrame()) {
lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
}
// Skip any JS_OSR frames. Such frames are used when the JS interpreter // enters a jit frame on a loop edge (via on-stack-replacement, or OSR). // To avoid both the profiling stack frame and jit frame being recorded // (and showing up twice), the interpreter marks the interpreter // profiling stack frame as JS_OSR to ensure that it doesn't get counted. if (profilingStackFrame.isOSRFrame()) {
profilingStackIndex++; continue;
}
if (nativeIndex >= 0) {
nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
}
// If there's a native stack frame which has the same SP as a profiling // stack frame, pretend we didn't see the native stack frame. Ditto for a // native stack frame which has the same SP as a JS stack frame. In effect // this means profiling stack frames or JS frames trump conflicting native // frames. if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) {
nativeStackAddr = nullptr;
nativeIndex--;
MOZ_ASSERT(profilingStackAddr);
}
// Check to see if profiling stack frame is top-most. if (profilingStackAddr > nativeStackAddr) {
MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount); const ProfilingStackFrame& profilingStackFrame =
profilingStackFrames[profilingStackIndex];
// Sp marker frames are just annotations and should not be recorded in // the profile. if (!profilingStackFrame.isSpMarkerFrame()) { if (aIsSynchronous && profilingStackFrame.categoryPair() ==
ProfilingCategoryPair::PROFILER) { // For stacks captured synchronously (ie. marker stacks), stop // walking the stack as soon as we enter the profiler category, // to avoid showing profiler internal code in marker stacks. return;
}
aCollector.CollectProfilingStackFrame(profilingStackFrame);
}
profilingStackIndex++; continue;
}
// If we reach here, there must be a native stack frame and it must be the // greatest frame. if (nativeStackAddr) {
MOZ_ASSERT(nativeIndex >= 0); void* addr = (void*)aNativeStack.mPCs[nativeIndex];
aCollector.CollectNativeLeafAddr(addr);
} if (nativeIndex >= 0) {
nativeIndex--;
}
}
}
#ifdefined(USE_FRAME_POINTER_STACK_WALK) staticvoid DoFramePointerBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread, const Registers& aRegs,
NativeStack& aNativeStack) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// Start with the current function. We use 0 as the frame number here because // the FramePointerStackWalk() call below will use 1..N. This is a bit weird // but it doesn't matter because StackWalkCallback() doesn't use the frame // number argument.
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
#ifdefined(USE_MOZ_STACK_WALK) staticvoid DoMozStackWalkBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread, const Registers& aRegs,
NativeStack& aNativeStack) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// Start with the current function. We use 0 as the frame number here because // the MozStackWalkThread() call below will use 1..N. This is a bit weird but // it doesn't matter because StackWalkCallback() doesn't use the frame number // argument.
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
#ifdef USE_EHABI_STACKWALK staticvoid DoEHABIBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread, const Registers& aRegs,
NativeStack& aNativeStack) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
// See the comment at the callsite for why this function is necessary. # ifdefined(MOZ_HAVE_ASAN_IGNORE)
MOZ_ASAN_IGNORE staticvoid ASAN_memcpy(void* aDst, constvoid* aSrc,
size_t aLen) { // The obvious thing to do here is call memcpy(). However, although // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the // false positive still manifests! So we must implement memcpy() ourselves // within this function. char* dst = static_cast<char*>(aDst); constchar* src = static_cast<constchar*>(aSrc);
for (size_t i = 0; i < aLen; i++) {
dst[i] = src[i];
}
} # endif
staticvoid DoLULBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread, const Registers& aRegs, NativeStack& aNativeStack) { // WARNING: this function runs within the profiler's "critical section". // WARNING: this function might be called while the profiler is inactive, and // cannot rely on ActivePS.
const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
// Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the // stack's registered top point. Do some basic sanity checks too. This // assumes that the TaggedUWord holding the stack pointer value is valid, but // it should be, since it was constructed that way in the code just above.
// We could construct |stackImg| so that LUL reads directly from the stack in // question, rather than from a copy of it. That would reduce overhead and // space use a bit. However, it gives a problem with dynamic analysis tools // (ASan, TSan, Valgrind) which is that such tools will report invalid or // racing memory accesses, and such accesses will be reported deep inside LUL. // By taking a copy here, we can either sanitise the copy (for Valgrind) or // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have // to try and suppress errors inside LUL. // // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks // observed in some minutes of testing, whilst keeping the size of this // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in // practice are small, 4KB or less, and so the copy costs are insignificant // compared to other profiler overhead. // // |stackImg| is allocated on this (the sampling thread's) stack. That // implies that the frame for this function is at least N_STACK_BYTES large. // In general it would be considered unacceptable to have such a large frame // on a stack, but it only exists for the unwinder thread, and so is not // expected to be a problem. Allocating it on the heap is troublesome because // this function runs whilst the sampled thread is suspended, so any heap // allocation risks deadlock. Allocating it as a global variable is not // thread safe, which would be a problem if we ever allow multiple sampler // threads. Hence allocating it on the stack seems to be the least-worst // option.
lul::StackImage stackImg;
{ # ifdefined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ defined(GP_PLAT_amd64_freebsd)
uintptr_t rEDZONE_SIZE = 128;
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ defined(GP_PLAT_arm64_freebsd)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; # elif defined(GP_PLAT_mips64_linux)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; # else # error "Unknown plat" # endif
uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
uintptr_t ws = sizeof(void*);
start &= ~(ws - 1);
end &= ~(ws - 1);
uintptr_t nToCopy = 0; if (start < end) {
nToCopy = end - start; if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
}
MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
stackImg.mLen = nToCopy;
stackImg.mStartAvma = start; if (nToCopy > 0) { // If this is a vanilla memcpy(), ASAN makes the following complaint: // // ERROR: AddressSanitizer: stack-buffer-underflow ... // ... // HINT: this may be a false positive if your program uses some custom // stack unwind mechanism or swapcontext // // This code is very much a custom stack unwind mechanism! So we use an // alternative memcpy() implementation that is ignored by ASAN. # ifdefined(MOZ_HAVE_ASAN_IGNORE)
ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy); # else
memcpy(&stackImg.mContents[0], (void*)start, nToCopy); # endif
(void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
}
}
// Update stats in the LUL stats object. Unfortunately this requires // three global memory operations.
lul->mStats.mContext += 1;
lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
lul->mStats.mFP += framePointerFramesAcquired;
}
#endif
#ifdef HAVE_NATIVE_UNWIND staticvoid DoNativeBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread, const Registers& aRegs,
NativeStack& aNativeStack) { // This method determines which stackwalker is used for periodic and // synchronous samples. (Backtrace samples are treated differently, see // profiler_suspend_and_sample_thread() for details). The only part of the // ordering that matters is that LUL must precede FRAME_POINTER, because on // Linux they can both be present. # ifdefined(USE_LUL_STACKWALK)
DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); # elif defined(USE_EHABI_STACKWALK)
DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); # elif defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); # elif defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); # else # error "Invalid configuration" # endif
} #endif
// Writes some components shared by periodic and synchronous profiles to // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample() // and DoPeriodicSample().) // // The grammar for entry sequences is in a comment above // ProfileBuffer::StreamSamplesToJSON. staticinlinevoid DoSharedSample(
PSLockRef aLock, bool aIsSynchronous, RegisteredThread& aRegisteredThread, const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
ProfileBuffer& aBuffer,
StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) { // WARNING: this function runs within the profiler's "critical section".
MOZ_ASSERT(!aBuffer.IsThreadSafe(), "Mutexes cannot be used inside this critical section");
// We can't walk the whole native stack, but we can record the top frame. if (aCaptureOptions == StackCaptureOptions::Full) {
aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
}
}
}
// Writes the components of a synchronous sample to the given ProfileBuffer. staticvoid DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread, const TimeStamp& aNow, const Registers& aRegs,
ProfileBuffer& aBuffer,
StackCaptureOptions aCaptureOptions) { // WARNING: this function runs within the profiler's "critical section".
MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack, "DoSyncSample should not be called when no capture is needed");
// Writes the components of a periodic sample to ActivePS's ProfileBuffer. // The ThreadId entry is already written in the main ProfileBuffer, its location // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different). staticvoid DoPeriodicSample(PSLockRef aLock,
RegisteredThread& aRegisteredThread,
ProfiledThreadData& aProfiledThreadData, const Registers& aRegs, uint64_t aSamplePos,
uint64_t aBufferRangeStart,
ProfileBuffer& aBuffer) { // WARNING: this function runs within the profiler's "critical section".
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.21Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.