exporting patch:
# HG changeset patch
# User Daniel Kraft
# Date 1313078123 -7200
# Node ID a451ae847adba4df8ad5e0cacd3a511ec6ec2757
# Parent 1be5f06d9fa77a4e6d8ab122612db915d060f27c
Collect hierarchical data profile.
* profiler.h: Retructure whole data collection.
* profiler.cc: Ditto for implementation.
diff -r 1be5f06d9fa7 -r a451ae847adb src/profiler.cc
--- a/src/profiler.cc Tue Aug 09 23:23:59 2011 -0400
+++ b/src/profiler.cc Thu Aug 11 17:55:23 2011 +0200
@@ -24,6 +24,7 @@
#include
#endif
+#include
#include
#include "defun.h"
@@ -56,11 +57,8 @@
parents (), children ()
{}
-// With the help of a mapping name -> index, convert a function_set list
-// to an Octave array of indices.
octave_value
-profile_data_accumulator::stats::function_set_value (const function_set& list,
- const fcn_index_map& idx)
+profile_data_accumulator::stats::function_set_value (const function_set& list)
{
const octave_idx_type n = list.size ();
@@ -68,9 +66,7 @@
octave_idx_type i = 0;
for (function_set::const_iterator p = list.begin (); p != list.end (); ++p)
{
- fcn_index_map::const_iterator q = idx.find (*p);
- assert (q != idx.end ());
- retval (i) = q->second;
+ retval(i) = *p;
++i;
}
assert (i == n);
@@ -78,19 +74,144 @@
return retval;
}
+profile_data_accumulator::tree_node::tree_node (tree_node* p, octave_idx_type f)
+ : parent (p), fcn_id (f), children (), time (0.0), calls (0)
+{}
+
+profile_data_accumulator::tree_node::~tree_node ()
+{
+ for (child_map::iterator i = children.begin (); i != children.end (); ++i)
+ delete i->second;
+}
+
+profile_data_accumulator::tree_node*
+profile_data_accumulator::tree_node::enter (octave_idx_type fcn)
+{
+ tree_node* retval;
+
+ child_map::iterator pos = children.find (fcn);
+ if (pos == children.end ())
+ {
+ retval = new tree_node (this, fcn);
+ children[fcn] = retval;
+ }
+ else
+ retval = pos->second;
+
+ ++retval->calls;
+ return retval;
+}
+
+profile_data_accumulator::tree_node*
+profile_data_accumulator::tree_node::exit (octave_idx_type fcn)
+{
+ assert (parent);
+ assert (fcn_id == fcn);
+
+ return parent;
+}
+
+void
+profile_data_accumulator::tree_node::build_flat (flat_profile& data) const
+{
+ // If this is not the top-level node, update profile entry for this function.
+ if (fcn_id != 0)
+ {
+ stats& entry = data[fcn_id - 1];
+
+ entry.time += time;
+ entry.calls += calls;
+
+ assert (parent);
+ if (parent->fcn_id != 0)
+ {
+ entry.parents.insert (parent->fcn_id);
+ data[parent->fcn_id - 1].children.insert (fcn_id);
+ }
+
+ if (!entry.recursive)
+ for (const tree_node* i = parent; i; i = i->parent)
+ if (i->fcn_id == fcn_id)
+ {
+ entry.recursive = true;
+ break;
+ }
+ }
+
+ // Recurse on children.
+ for (child_map::const_iterator i = children.begin ();
+ i != children.end (); ++i)
+ i->second->build_flat (data);
+}
+
+octave_value
+profile_data_accumulator::tree_node::get_hierarchical (void) const
+{
+ /* Note that we don't generate the entry just for this node, but rather
+ a struct-array with entries for all children. This way, the top-node
+ (for which we don't want a real entry) generates already the final
+ hierarchical profile data. */
+
+ const octave_idx_type n = children.size ();
+
+ Cell rv_indices (n, 1);
+ Cell rv_times (n, 1);
+ Cell rv_calls (n, 1);
+ Cell rv_children (n, 1);
+
+ octave_idx_type i = 0;
+ for (child_map::const_iterator p = children.begin ();
+ p != children.end (); ++p)
+ {
+ const tree_node& entry = *p->second;
+
+ rv_indices(i) = octave_value (p->first);
+ rv_times(i) = octave_value (entry.time);
+ rv_calls(i) = octave_value (entry.calls);
+ rv_children(i) = entry.get_hierarchical ();
+
+ ++i;
+ }
+ assert (i == n);
+
+ Octave_map retval;
+
+ retval.assign ("Index", rv_indices);
+ retval.assign ("SelfTime", rv_times);
+ retval.assign ("NumCalls", rv_calls);
+ retval.assign ("Children", rv_children);
+
+ return retval;
+}
+
profile_data_accumulator::profile_data_accumulator ()
- : enabled (false), call_stack (), data (), last_time (-1.0)
+ : known_functions (), fcn_index (),
+ enabled (false), call_tree (NULL), last_time (-1.0)
{}
+profile_data_accumulator::~profile_data_accumulator ()
+{
+ if (call_tree)
+ delete call_tree;
+}
+
void
profile_data_accumulator::set_active (bool value)
{
- // If we enable, clear the call-stack. This ensures we freshly start
- // with collecting times now.
if (value)
{
- while (!call_stack.empty ())
- call_stack.pop_back ();
+ // Create a call-tree top-node if there isn't yet one.
+ if (!call_tree)
+ call_tree = new tree_node (NULL, 0);
+
+ // Let the top-node be the active one. This ensures we have a clean
+ // fresh start collecting times.
+ active_fcn = call_tree;
+ }
+ else
+ {
+ // Make sure we start with fresh timing if we're re-enabled later.
+ last_time = -1.0;
}
enabled = value;
@@ -101,39 +222,34 @@
{
// The enter class will check and only call us if the profiler is active.
assert (is_active ());
+ assert (call_tree);
// If there is already an active function, add to its time before
// pushing the new one.
- if (!call_stack.empty ())
+ if (active_fcn != call_tree)
add_current_time ();
- // Update non-timing related data for the function entered.
- stats& entry = data[fcn];
- ++entry.calls;
- if (!call_stack.empty ())
+ // Map the function's name to its index.
+ octave_idx_type fcn_idx;
+ fcn_index_map::iterator pos = fcn_index.find (fcn);
+ if (pos == fcn_index.end ())
{
- const std::string parent_name = call_stack.back ();
- entry.parents.insert (parent_name);
- data[parent_name].children.insert (fcn);
+ known_functions.push_back (fcn);
+ fcn_idx = known_functions.size ();
+ fcn_index[fcn] = fcn_idx;
}
- if (!entry.recursive)
- for (call_stack_type::iterator i = call_stack.begin ();
- i != call_stack.end (); ++i)
- if (*i == fcn)
- {
- entry.recursive = true;
- break;
- }
+ else
+ fcn_idx = pos->second;
- call_stack.push_back (fcn);
+ active_fcn = active_fcn->enter (fcn_idx);
last_time = query_time ();
}
void
profile_data_accumulator::exit_function (const std::string& fcn)
{
- assert (!call_stack.empty ());
- assert (fcn == call_stack.back ());
+ assert (call_tree);
+ assert (active_fcn != call_tree);
// Usually, if we are disabled this function is not even called. But the
// call disabling the profiler is an exception. So also check here
@@ -141,7 +257,9 @@
if (is_active ())
add_current_time ();
- call_stack.pop_back ();
+ fcn_index_map::iterator pos = fcn_index.find (fcn);
+ assert (pos != fcn_index.end ());
+ active_fcn = active_fcn->exit (pos->second);
// If this was an "inner call", we resume executing the parent function
// up the stack. So note the start-time for this!
@@ -157,26 +275,26 @@
return;
}
- data.clear ();
+ known_functions.clear ();
+ fcn_index.clear ();
+
+ if (call_tree)
+ {
+ delete call_tree;
+ call_tree = NULL;
+ }
+
last_time = -1.0;
}
octave_value
-profile_data_accumulator::get_data (void) const
+profile_data_accumulator::get_flat (void) const
{
- const octave_idx_type n = data.size ();
+ const octave_idx_type n = known_functions.size ();
- // For the parent/child data, we need to map function key-names
- // to the indices they correspond to in the output array. Find them out
- // in a preparation step.
- fcn_index_map fcn_indices;
- octave_idx_type i = 0;
- for (stats_map::const_iterator p = data.begin (); p != data.end (); ++p)
- {
- fcn_indices[p->first] = i + 1;
- ++i;
- }
- assert (i == n);
+ flat_profile flat (n);
+ assert (call_tree);
+ call_tree->build_flat (flat);
Cell rv_names (n, 1);
Cell rv_times (n, 1);
@@ -185,21 +303,15 @@
Cell rv_parents (n, 1);
Cell rv_children (n, 1);
- i = 0;
- for (stats_map::const_iterator p = data.begin (); p != data.end (); ++p)
+ for (octave_idx_type i = 0; i != n; ++i)
{
- const stats& entry = p->second;
-
- rv_names (i) = octave_value (p->first);
- rv_times (i) = octave_value (entry.time);
- rv_calls (i) = octave_value (entry.calls);
- rv_recursive (i) = octave_value (entry.recursive);
- rv_parents (i) = stats::function_set_value (entry.parents, fcn_indices);
- rv_children (i) = stats::function_set_value (entry.children, fcn_indices);
-
- ++i;
+ rv_names(i) = octave_value (known_functions[i]);
+ rv_times(i) = octave_value (flat[i].time);
+ rv_calls(i) = octave_value (flat[i].calls);
+ rv_recursive(i) = octave_value (flat[i].recursive);
+ rv_parents(i) = stats::function_set_value (flat[i].parents);
+ rv_children(i) = stats::function_set_value (flat[i].children);
}
- assert (i == n);
Octave_map retval;
@@ -213,6 +325,13 @@
return retval;
}
+octave_value
+profile_data_accumulator::get_hierarchical (void) const
+{
+ assert (call_tree);
+ return call_tree->get_hierarchical ();
+}
+
double
profile_data_accumulator::query_time (void) const
{
@@ -226,14 +345,8 @@
const double t = query_time ();
assert (last_time >= 0.0 && last_time <= t);
- assert (!call_stack.empty ());
- const std::string name = call_stack.back ();
-
- // The entry for this function should already be created; namely
- // when entering the function via the non-timing data collection!
- stats_map::iterator pos = data.find (name);
- assert (pos != data.end ());
- pos->second.time += t - last_time;
+ assert (call_tree && active_fcn != call_tree);
+ active_fcn->add_time (t - last_time);
}
profile_data_accumulator profiler;
@@ -283,7 +396,7 @@
}
// Query the timings collected by the profiler.
-DEFUN (__profiler_data, args, ,
+DEFUN (__profiler_data, args, nargout,
"-*- texinfo -*-\n\
@deftypefn {Function File} __profiler_data ()\n\
Undocumented internal function.\n\
@@ -295,7 +408,9 @@
if (nargin > 0)
warning ("profiler_data: ignoring extra arguments");
- retval(0) = profiler.get_data ();
+ retval(0) = profiler.get_flat ();
+ if (nargout > 1)
+ retval(1) = profiler.get_hierarchical ();
return retval;
}
diff -r 1be5f06d9fa7 -r a451ae847adb src/profiler.h
--- a/src/profiler.h Tue Aug 09 23:23:59 2011 -0400
+++ b/src/profiler.h Thu Aug 11 17:55:23 2011 +0200
@@ -40,72 +40,113 @@
// functions in a manner protected from stack unwinding.
class enter
{
- private:
+ private:
- profile_data_accumulator& acc;
+ profile_data_accumulator& acc;
+ std::string fcn;
- std::string fcn;
+ public:
- public:
+ enter (profile_data_accumulator&, const std::string&);
+ virtual ~enter (void);
- enter (profile_data_accumulator&, const std::string&);
+ private:
- virtual ~enter (void);
-
- private:
-
- // No copying!
-
- enter (const enter&);
-
- enter& operator = (const enter&);
+ // No copying!
+ enter (const enter&);
+ enter& operator = (const enter&);
};
profile_data_accumulator (void);
+ virtual ~profile_data_accumulator ();
bool is_active (void) const { return enabled; }
-
void set_active (bool);
void reset (void);
- octave_value get_data (void) const;
+ octave_value get_flat (void) const;
+ octave_value get_hierarchical (void) const;
private:
- typedef std::set function_set;
+ // One entry in the flat profile (i.e., a collection of data for a single
+ // function). This is filled in when building the flat profile from the
+ // hierarchical call tree.
+ struct stats
+ {
+ stats ();
+
+ double time;
+ unsigned calls;
+
+ bool recursive;
+
+ typedef std::set function_set;
+ function_set parents;
+ function_set children;
+
+ // Convert a function_set list to an Octave array of indices.
+ static octave_value function_set_value (const function_set&);
+ };
+
+ typedef std::vector flat_profile;
+
+ // Store data for one node in the call-tree of the hierarchical profiler
+ // data we collect.
+ class tree_node
+ {
+ public:
+
+ tree_node (tree_node*, octave_idx_type);
+ virtual ~tree_node ();
+
+ void add_time (double dt) { time += dt; }
+
+ // Enter a child function. It is created in the list of children if it
+ // wasn't already there. The now-active child node is returned.
+ tree_node* enter (octave_idx_type);
+
+ // Exit function. As a sanity-check, it is verified that the currently
+ // active function actually is the one handed in here. Returned is the
+ // then-active node, which is our parent.
+ tree_node* exit (octave_idx_type);
+
+ void build_flat (flat_profile&) const;
+ octave_value get_hierarchical (void) const;
+
+ private:
+
+ tree_node* parent;
+ octave_idx_type fcn_id;
+
+ typedef std::map child_map;
+ child_map children;
+
+ // This is only time spent *directly* on this level, excluding children!
+ double time;
+
+ unsigned calls;
+
+ // No copying!
+ tree_node (const tree_node&);
+ tree_node& operator = (const tree_node&);
+ };
+
+ // Each function we see in the profiler is given a unique index (which
+ // simply counts starting from 1). We thus have to map profiler-names to
+ // those indices. For all other stuff, we identify functions by their index.
+
+ typedef std::vector function_set;
typedef std::map fcn_index_map;
- // Store some statistics data collected for a function.
- class stats
- {
- private:
-
- double time;
- unsigned calls;
-
- bool recursive;
-
- function_set parents;
- function_set children;
-
- public:
-
- stats ();
-
- static octave_value
- function_set_value (const function_set&, const fcn_index_map&);
-
- friend class profile_data_accumulator;
- };
+ function_set known_functions;
+ fcn_index_map fcn_index;
bool enabled;
- typedef std::vector call_stack_type;
- call_stack_type call_stack;
-
- typedef std::map stats_map;
- stats_map data;
+ tree_node* call_tree;
+ tree_node* active_fcn;
// Store last timestamp we had, when the currently active function was called.
double last_time;
@@ -117,19 +158,17 @@
// Query a timestamp, used for timing calls (obviously).
// This is not static because in the future, maybe we want a flag
- // in the profiler or something to choose between cputime, wall-time
+ // in the profiler or something to choose between cputime, wall-time,
// user-time, system-time, ...
double query_time () const;
- // Add the time elapsed since last_time to the function on the top
- // of our call-stack. This is called from two different positions,
- // thus it is useful to have it as a seperate function.
+ // Add the time elapsed since last_time to the function we're currently in.
+ // This is called from two different positions, thus it is useful to have
+ // it as a seperate function.
void add_current_time (void);
// No copying!
-
profile_data_accumulator (const profile_data_accumulator&);
-
profile_data_accumulator& operator = (const profile_data_accumulator&);
};