added everything
This commit is contained in:
557
engine/platform/profiler.h
Executable file
557
engine/platform/profiler.h
Executable file
@@ -0,0 +1,557 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// Torque Game Engine
|
||||
// Copyright (C) GarageGames.com, Inc.
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#ifndef _PROFILER_H_
|
||||
#define _PROFILER_H_
|
||||
|
||||
#include "core/torqueConfig.h"
|
||||
|
||||
#ifdef TORQUE_ENABLE_PROFILER
|
||||
|
||||
// First, define our macros; that way if we use the profiler in any
|
||||
// of the includes we depend on, we'll have them available.
|
||||
#define PROFILE_START(name) \
|
||||
static ProfilerRoot pdata##name##obj (#name); \
|
||||
if(gProfiler) gProfiler->hashPush(& pdata##name##obj )
|
||||
|
||||
#define PROFILE_END() if(gProfiler) gProfiler->hashPop()
|
||||
|
||||
#else
|
||||
|
||||
// If profiler is disabled then just stub the blocks out.
|
||||
#define PROFILE_START(x)
|
||||
#define PROFILE_END()
|
||||
|
||||
#endif
|
||||
|
||||
// Then include our includes and the real profiler code that needs them.
|
||||
#ifdef TORQUE_ENABLE_PROFILER
|
||||
|
||||
#include "platform/platform.h"
|
||||
#include "core/stringTable.h"
|
||||
#include "console/console.h"
|
||||
#include "core/tVector.h"
|
||||
#include "core/fileStream.h"
|
||||
#include "platform/platformThread.h"
|
||||
|
||||
struct ProfilerData;
|
||||
struct ProfilerRoot;
|
||||
struct ProfilerRootData;
|
||||
struct ProfilerInstance;
|
||||
|
||||
/// Torque includes a full-featured code profiler, which can be used to
|
||||
/// measure the performance of game code.
|
||||
///
|
||||
/// The profiler tracks performance information about a number of blocks in
|
||||
/// the code base defined by matched sets of PROFILE_START() and
|
||||
/// PROFILER_END() blocks. Basically, you can use it to see how much time
|
||||
/// a chunk of code is taking out of your overall execution time.
|
||||
///
|
||||
/// The information tracked includes:
|
||||
/// * Percent of time spent in just this block.
|
||||
/// * Percent of time spent in this block as well as any other blocks
|
||||
/// encountered inside of it.
|
||||
/// * Number of invocations, ie, how many times this block was run.
|
||||
///
|
||||
/// The profiler tracks both global statistics for each block, as well as
|
||||
/// providing hierarchical data about performance (ie, what blocks are
|
||||
/// called from inside what other blocks, and what time is spent where).
|
||||
///
|
||||
/// While this information is not as fine grained as that provided by
|
||||
/// serious, low-level profiling tools (ie, gprof, VTune, Shark), the
|
||||
/// overhead is lower. The profiler can be enabled in beta or debug builds
|
||||
/// of your application, and used to easily dump information about low
|
||||
/// performance situations. Because it is Torque-aware, it also gives more
|
||||
/// useful information in some cases than lower level profilers can provide.
|
||||
///
|
||||
/// For instance, it can easily show when and how frequently you are
|
||||
/// allocating/freeing memory, or what the relative breakdown in time is
|
||||
/// between player update code and physics for other items. It can give
|
||||
/// useful information for trying to decide if your game is CPU or GPU
|
||||
/// bound, or if code is being erroneously run too much. (Seeing a helper
|
||||
/// function is getting run several dozen thousand times more than its
|
||||
/// parent can be one tip-off. ;)
|
||||
///
|
||||
/// <b>How To Use The Profiler</b>
|
||||
///
|
||||
/// First, you must #define TORQUE_ENABLE_PROFILER in torqueConfig.h in
|
||||
/// order to active the profiler. Then you can use it from script. Examples
|
||||
/// of script use:
|
||||
///
|
||||
/// @code
|
||||
/// // profilerEnable enables or disables profiling. Data is only gathered
|
||||
/// // when the profiler is enabled.
|
||||
/// profilerEnable(bool enable);
|
||||
/// profilerReset(); //resets the data gathered by the profiler
|
||||
/// profilerDump(); //dumps all profiler data to the console
|
||||
/// profilerDumpToFile(string filename); //dumps all profiler data to a given file
|
||||
/// @endcode
|
||||
///
|
||||
/// The C++ code side of the profiler uses pairs of PROFILE_START() and
|
||||
/// PROFILE_END().
|
||||
///
|
||||
/// When using these macros, make sure there is a PROFILE_END() for every
|
||||
/// PROFILE_START and a PROFILE_START() for every PROFILE_END(). It is fine
|
||||
/// to nest these macros, however, you must make sure that no matter what
|
||||
/// execution path the code takes, the PROFILE macros will be balanced. (This
|
||||
/// usually means having a PROFILE_END() before every return statement in a
|
||||
/// function, for instance.)
|
||||
///
|
||||
/// The profiler can be used to locate areas of code that are slow or should
|
||||
/// be considered for optimization. Since it tracks the relative time of
|
||||
/// execution of that code to the execution of the main loop, it is possible
|
||||
/// to benchmark any given code to see if changes made will actually
|
||||
/// improve performance.
|
||||
///
|
||||
/// Here are some examples:
|
||||
/// @code
|
||||
/// PROFILE_START(TerrainRender);
|
||||
/// //some code here
|
||||
/// PROFILE_START(TerrainRenderGridSquare);
|
||||
/// //some code here
|
||||
/// PROFILE_END();
|
||||
/// //possibly some code here
|
||||
/// PROFILE_END();
|
||||
///
|
||||
/// void foo()
|
||||
/// {
|
||||
/// PROFILE_START(foo_do);
|
||||
/// if(doThing())
|
||||
/// {
|
||||
/// PROFILE_END();
|
||||
/// return true;
|
||||
/// }
|
||||
/// PROFILE_END();
|
||||
/// return false;
|
||||
/// }
|
||||
/// @endcode
|
||||
///
|
||||
/// <b>What Data the Profiler Gathers</b>
|
||||
///
|
||||
/// The profiler gathers two general representations of the profiling data.
|
||||
///
|
||||
/// First, it gathers per block data. This is data that relates to every
|
||||
/// single execution of a given block - for instance, foo_do in the
|
||||
/// above example code has its invocation count and total and non-sub time
|
||||
/// tracked regardless of where it is called from. This is used to generate
|
||||
/// the first half of the profiler data dump, the sorted list of what blocks
|
||||
/// are, overall, taking up the most non-sub time.
|
||||
///
|
||||
/// Second, data is gathered per instance of a block. This is the data used
|
||||
/// to display the nice formatted tree that makes up the second half of a
|
||||
/// profiler data dump. A single block may have many instances as it is
|
||||
/// called from different parts of the codebase. For instance, foo_do might
|
||||
/// get called from the main loop, as well as from the console, making it
|
||||
/// show up in several places on the hierarchical view:
|
||||
///
|
||||
/// @code
|
||||
/// MainLoop
|
||||
/// foo_do
|
||||
/// ConsoleCode
|
||||
/// foo_do
|
||||
/// @endcode
|
||||
///
|
||||
/// There are three values gathered per block:
|
||||
/// - Total time. Shown as a percentage of the time period that was profiled,
|
||||
/// this is the time spent in both this block and all its children.
|
||||
/// - Non-sub time. This is the amount of time spent ONLY in this block,
|
||||
/// and not any of its children.
|
||||
/// - Invocations. The number of times this block was executed.
|
||||
///
|
||||
/// <b>How the Profiler Works</b>
|
||||
///
|
||||
/// Internally, all profiler state is managed by ProfilerInstance. The
|
||||
/// gProfiler is just a wrapper around this, responsible for dispatching
|
||||
/// profiler events (ie, PROFILER_START and PROFILE_END macros, which
|
||||
/// correspond to calls to Profiler::hashPush and Profiler::hashPop
|
||||
/// respectively) to the appropriate ProfilerInstance. There is one
|
||||
/// ProfilerInstance per thread, representing the entire profiler state
|
||||
/// for that thread.
|
||||
///
|
||||
/// ProfilerRoots are allocated statically in each PROFILER_START macro, and
|
||||
/// are used as "anchors" for a table of ProfilerRootDatas contained in each
|
||||
/// ProfilerInstance. Each ProfilerRoot is assigned an ID which is used for
|
||||
/// fast lookups in the ProfilerInstance's ProfilerRootData table. There are
|
||||
/// potentially many ProfilerRootDatas for a given ProfilerRoot, but only one
|
||||
/// and exactly one PRD for a PR in a given ProfilerInstance.
|
||||
///
|
||||
/// Together, a PR and a PRD represent a "block", i.e., a specific chunk
|
||||
/// of code begun with a PROFILE_START() and ended with one or more
|
||||
/// PROFILE_END()s. This is all that is needed to gather the per-block
|
||||
/// data mentioned in the previous section.
|
||||
///
|
||||
/// However, context is often important in analysis, so we also gather
|
||||
/// hierarchical data, tracking what blocks are called from which other
|
||||
/// blocks. This allows us to know that, for instance, we only allocate
|
||||
/// memory once out of every hundred frames in the terrain inner loops,
|
||||
/// while executing a single console function can involve several frees
|
||||
/// and allocs. (That is to say, TerrainRender is invoked hundreds of times,
|
||||
/// while MemoryAlloc and MemoryFree only show up under it once or twice,
|
||||
/// and ConsoleExec shows up once or twice with MemoryAlloc and MemoryFree
|
||||
/// being invoked a dozen times each.)
|
||||
///
|
||||
/// To acheive this, we make use of the ProfilerData structure. For each
|
||||
/// "instance" of a block we encounter, we allocate a ProfilerData and place
|
||||
/// it in a tree structure. A given ProfilerRootData in a ProfilerInstance
|
||||
/// might have many ProfilerDatas, representing different situations under
|
||||
/// which it is executed, ie, different blocks that call it.
|
||||
///
|
||||
/// So we build a list, and grow a tree from it. By tracking our position
|
||||
/// in the tree, we can easily build up a consistent view of how our code
|
||||
/// blocks are called, and how we spend our time in different calling
|
||||
/// hierarchies.
|
||||
class Profiler
|
||||
{
|
||||
friend class ProfilerInstance;
|
||||
|
||||
ProfilerInstance *mInstanceListHead;
|
||||
ThreadStorage mCurrentInstance;
|
||||
|
||||
/// Get the current ProfilerInstance for the executing thread, or
|
||||
/// allocate it if it is unavailable.
|
||||
ProfilerInstance *getCurrentInstance();
|
||||
|
||||
/// We only want one thing to dump at a time - imagine the madness
|
||||
/// of interleaved output - so we stick a mutex here to manage that, and
|
||||
/// some other state information, as it's easy to get to from wherever
|
||||
/// we've got the lock.
|
||||
void *mDumpMutex;
|
||||
|
||||
bool mCurrentDumpIsConsole;
|
||||
bool mCurrentDumpIsFile;
|
||||
Stream *mCurrentDumpStream;
|
||||
|
||||
public:
|
||||
Profiler();
|
||||
~Profiler();
|
||||
|
||||
/// Get a list of the IDs of all the threads we're profiling. Pass these
|
||||
/// IDs into the function variants that require an id parameter in order
|
||||
/// to control a specific thread's profiling activity.
|
||||
void getThreadList(Vector<U32> &ids);
|
||||
|
||||
ProfilerInstance *getInstanceByID(const U32 id);
|
||||
|
||||
void enable(bool enable);
|
||||
void dumpToConsole();
|
||||
void dumpToFile(const char *fileName);
|
||||
void enableMarker(const char *marker, bool enable);
|
||||
void reset();
|
||||
|
||||
void hashPush(ProfilerRoot *pr);
|
||||
void hashPop();
|
||||
};
|
||||
|
||||
extern Profiler *gProfiler;
|
||||
|
||||
/// Helper structure used to store general information about a profiler
|
||||
/// block.
|
||||
///
|
||||
/// Everything that follows are implementation details.
|
||||
///
|
||||
/// @see Profiler for information on how to use the profiler.
|
||||
///
|
||||
/// We use a slightly indirected scheme to manage the complexities of
|
||||
/// profiling threads. Basically, the ProfilerRoot structures store the
|
||||
/// name and other constant, global information about a given profiler
|
||||
/// block. We walk the list at startup to generate a list at startup
|
||||
/// which is then used to lookup into a thread-specific table of
|
||||
/// ProfilerRootDatas. This way we can easily track profile information
|
||||
/// for each thread without duplication of information or trying to make
|
||||
/// the core profiling data structures thread-safe via synchronization
|
||||
/// primitives, which can be horribly slow.
|
||||
///
|
||||
/// Profilers must be fast and deterministic, so that they don't impact
|
||||
/// what they measure. :)
|
||||
struct ProfilerRoot
|
||||
{
|
||||
/// Name of this profile block.
|
||||
const char *mName;
|
||||
|
||||
/// Hash of the profile block name, for fast lookups.
|
||||
U32 mNameHash;
|
||||
|
||||
/// Next block in the master list.
|
||||
ProfilerRoot *mNextRoot;
|
||||
|
||||
/// Assigned sequentially to map to the thread table.
|
||||
/// This allows us to quickly lookup into it. (A hash
|
||||
/// would be slow.)
|
||||
U32 mID;
|
||||
|
||||
// This structure is exactly 16 bytes. (4 words at 4 bytes each)
|
||||
// Following are non-data members:
|
||||
|
||||
/// Head of master list of all profiler blocks.
|
||||
static ProfilerRoot *smRootList;
|
||||
|
||||
/// Global count of roots.
|
||||
static U32 smRootCount;
|
||||
|
||||
/// Constructor - called by the PROFILE_START macro.
|
||||
ProfilerRoot(const char *name);
|
||||
};
|
||||
|
||||
/// Profiler thread table data element. One of these is created for every
|
||||
/// ProfilerRoot, to store thread specific data. They are indexed by
|
||||
/// ProfilerRoot::mID.
|
||||
///
|
||||
/// @see Profiler for information on how to use the profiler.
|
||||
struct ProfilerRootData
|
||||
{
|
||||
ProfilerRootData();
|
||||
~ProfilerRootData();
|
||||
|
||||
/// We keep a pointer back to the original root structure so we can do
|
||||
/// name lookups and the like.
|
||||
ProfilerRoot *mRoot;
|
||||
|
||||
/// Head of linked list of actual ProfilerDatas for this
|
||||
/// block.
|
||||
ProfilerData *mFirstProfilerData;
|
||||
|
||||
/// Should profile information for this block be gathered?
|
||||
bool mEnabled;
|
||||
|
||||
/// @name Data Accumulators
|
||||
///
|
||||
///
|
||||
/// @{
|
||||
|
||||
/// Total time spent in all instances of this block.
|
||||
F64 mTotalTime;
|
||||
|
||||
/// Total time spent in all child instances of this block, excluding
|
||||
/// this block.
|
||||
F64 mSubTime;
|
||||
|
||||
/// Total number of times all instances of this block have been invoked.
|
||||
U32 mTotalInvokeCount;
|
||||
|
||||
/// @}
|
||||
|
||||
/// Reset all the data accumulators.
|
||||
void reset();
|
||||
};
|
||||
|
||||
/// Runtime information on a given instance of a profiler block.
|
||||
///
|
||||
/// @see Profiler for information on how to use the profiler.
|
||||
///
|
||||
/// Generated on the fly as we build our profiler tree, this represents
|
||||
/// profiled instances of a block. This enables us to accurately track
|
||||
/// recursive situations and the like.
|
||||
///
|
||||
/// It is organized in the following lists:
|
||||
/// - Root list. Each root has a list of all its instances associated
|
||||
/// with it.
|
||||
/// - ProfilerData list. Each thread keeps a master list of all the
|
||||
/// ProfilerDatas associated with it.
|
||||
/// - Hash list. Children of a ProfilerData are kept in a hash indexed
|
||||
/// by name. Thus a bin list is maintained for each bin of the hash.
|
||||
/// - Child list. The children of a ProfilerData are all kept in a
|
||||
/// list for easy traversal.
|
||||
///
|
||||
struct ProfilerData
|
||||
{
|
||||
ProfilerData();
|
||||
~ProfilerData();
|
||||
|
||||
/// What ProfilerRootData do we belong to?
|
||||
ProfilerRootData *mOwningRoot;
|
||||
|
||||
/// Next ProfilerData for this root.
|
||||
ProfilerData *mNextForRoot;
|
||||
|
||||
/// Master list for the profiled thread.
|
||||
ProfilerData *mNextProfilerData;
|
||||
|
||||
/// Next ProfilerData in the hash bin.
|
||||
ProfilerData *mNextHash;
|
||||
|
||||
/// Who is our parent?
|
||||
ProfilerData *mParent;
|
||||
|
||||
/// Next sibling of our parent.
|
||||
ProfilerData *mNextSibling;
|
||||
|
||||
/// Our first child.
|
||||
ProfilerData *mFirstChild;
|
||||
|
||||
enum {
|
||||
HashTableSize = 32,
|
||||
};
|
||||
ProfilerData *mChildHash[HashTableSize];
|
||||
|
||||
/// For fast lookups, we cache the last seen child. If we see the same
|
||||
/// profiler block many times in a row this can save us a lot of
|
||||
/// traversal in Profiler::hashPush.
|
||||
ProfilerData *mLastSeenProfiler;
|
||||
|
||||
/// The hash of this block's names, kept here for fast lookups.
|
||||
U32 mHash;
|
||||
|
||||
/// Number of times we have recursed into this block from itself.
|
||||
U32 mSubDepth;
|
||||
|
||||
/// Number of times this block instance has been run.
|
||||
U32 mInvokeCount;
|
||||
|
||||
/// The high performance timer needs some memory to store its start time.
|
||||
U32 mStartTime[2];
|
||||
|
||||
/// Total time we've spent in this block instance.
|
||||
F64 mTotalTime;
|
||||
|
||||
/// Total time we've spent in this block instance's children, excluding
|
||||
/// this instance itself.
|
||||
F64 mSubTime;
|
||||
};
|
||||
|
||||
/// Manages thread-local profiler state.
|
||||
///
|
||||
/// @see Profiler for information on how to use the profiler.
|
||||
///
|
||||
/// This is the meat of the profiler; there are one of these for each
|
||||
/// profiler and they are responsible for tracking all of the state for a
|
||||
/// thread's profile information.
|
||||
class ProfilerInstance
|
||||
{
|
||||
friend class Profiler;
|
||||
|
||||
/// @name Dump Code
|
||||
///
|
||||
/// Miscellaneous helper functions for dumping the profiler state to
|
||||
/// disk or console. We have a unified interface to avoid duplicating
|
||||
/// a bunch of complex output code for file/console output.
|
||||
///
|
||||
/// If you have a burning need to know what these guys do, just read the
|
||||
/// code. It's really pretty straightforward.
|
||||
///
|
||||
/// @{
|
||||
|
||||
/// Print data to the appropriate place(s). (Console, stream, or both.)
|
||||
static void printFunc(char *fmt, ...);
|
||||
|
||||
/// Print a sorted vector of profiler block data in a nice way.
|
||||
static void printSortedRootData(Vector<ProfilerRootData*> &rootVector, F64 totalTime);
|
||||
|
||||
/// Recursively dump part of the profiler data tree.
|
||||
static void profilerDataDumpRecurse(ProfilerData *data, char *buffer, U32 bufferLen, F64 totalTime);
|
||||
|
||||
/// @}
|
||||
|
||||
/// We tend to encounter new blocks part-way through execution. This is
|
||||
/// due to the nature of static variables (which our global roots are).
|
||||
/// They only initialize when they come into scope! So, we have to be able
|
||||
/// to grow our local root data table. This ensures it has at least
|
||||
/// newCount slows in it, and makes sure everything points to the right
|
||||
/// roots.
|
||||
void growRoots(U32 newCount);
|
||||
|
||||
enum
|
||||
{
|
||||
MaxStackDepth = 256,
|
||||
DumpFileNameLength = 256
|
||||
};
|
||||
|
||||
/// ID of the thread we have profiled data for.
|
||||
U32 mThreadID;
|
||||
|
||||
/// Mutex to manage some nasty conditions - we lock on deconstruction,
|
||||
/// and also when we're dumping so we don't try to dump from two places
|
||||
/// at the same time.
|
||||
///
|
||||
/// @note This isn't bullet proof yet on the deconstruct case, but we
|
||||
/// don't actually kill off profiler data so we're ok. :)
|
||||
void *mMutex;
|
||||
|
||||
/// Thread-local table of all the ProfilerRootDatas.
|
||||
///
|
||||
/// Since we have to keep track of per-block data specific to each
|
||||
/// thread (it's a pain to update, nevermind interpret data global to
|
||||
/// all threads), we keep a local copy of everything here so we're
|
||||
/// essentially independent of the ProfilerRoots scattered throughout
|
||||
/// the codebase.
|
||||
ProfilerRootData *mRootTable;
|
||||
|
||||
/// Number of roots we have current allocated. Since not all roots are
|
||||
/// known (they get initialized as they're visited...), we have to be
|
||||
/// able to reallocate our thread-local root table. This isn't much fun
|
||||
/// but enables us to be much more robust.
|
||||
///
|
||||
/// @see growRoots
|
||||
U32 mRootCount;
|
||||
|
||||
/// Our current ProfilerData. This indicates where in the profiler tree
|
||||
/// we are at the moment, and is updated on (almost) every push or pop.
|
||||
ProfilerData *mCurrentProfilerData;
|
||||
|
||||
/// List of all allocated ProfilerDatas. Important for book-keeping
|
||||
/// and update operations.
|
||||
ProfilerData *mProfileList;
|
||||
|
||||
/// This is the root ProfilerData in our local tree of profiler data.
|
||||
ProfilerData *mRootProfilerData;
|
||||
|
||||
/// Currently enabled?
|
||||
bool mEnabled;
|
||||
|
||||
/// Current depth of profiler stack.
|
||||
S32 mStackDepth;
|
||||
|
||||
/// Maximum allowed profiler stack depth.
|
||||
U32 mMaxStackDepth;
|
||||
|
||||
/// Enable state for next frame.
|
||||
bool mNextEnable;
|
||||
|
||||
/// Should we dump to console at next frame?
|
||||
bool mDumpToConsole;
|
||||
|
||||
/// Should we dump to file at next frame?
|
||||
bool mDumpToFile;
|
||||
|
||||
/// If so, where?
|
||||
char mDumpFileName[DumpFileNameLength];
|
||||
|
||||
/// Next allocated ProfilerInstance.
|
||||
ProfilerInstance *mNextInstance;
|
||||
|
||||
/// Do a bunch of extra checking to make sure our data structures
|
||||
/// are clean.
|
||||
///
|
||||
/// @note This is mostly kept around for debug situations, and is not
|
||||
/// normally called.
|
||||
void validate();
|
||||
|
||||
/// Look up the ProfilerRootData that corresponds to a given
|
||||
/// ProfilerRoot.(
|
||||
inline ProfilerRootData *lookup(ProfilerRoot *pr)
|
||||
{
|
||||
return mRootTable + pr->mID;
|
||||
}
|
||||
|
||||
/// Helper function to do the actual dumping. You should have the
|
||||
/// instance's mutex before you call this.
|
||||
void dump();
|
||||
|
||||
public:
|
||||
ProfilerInstance();
|
||||
~ProfilerInstance();
|
||||
|
||||
void hashPush(ProfilerRoot *pr);
|
||||
void hashPop();
|
||||
|
||||
void enable(bool enable);
|
||||
void enableMarker(const char *name, bool enable);
|
||||
void reset();
|
||||
void dumpToConsole();
|
||||
void dumpToFile(const char *fileName);
|
||||
};
|
||||
|
||||
#endif /* TORQUE_ENABLE_PROFILER */
|
||||
|
||||
#endif /* _PROFILER_H_ */
|
||||
Reference in New Issue
Block a user