exporting patch: # HG changeset patch # User Daniel Kraft # Date 1313078123 -7200 # Node ID a451ae847adba4df8ad5e0cacd3a511ec6ec2757 # Parent 1be5f06d9fa77a4e6d8ab122612db915d060f27c Collect hierarchical data profile. * profiler.h: Retructure whole data collection. * profiler.cc: Ditto for implementation. diff -r 1be5f06d9fa7 -r a451ae847adb src/profiler.cc --- a/src/profiler.cc Tue Aug 09 23:23:59 2011 -0400 +++ b/src/profiler.cc Thu Aug 11 17:55:23 2011 +0200 @@ -24,6 +24,7 @@ #include #endif +#include #include #include "defun.h" @@ -56,11 +57,8 @@ parents (), children () {} -// With the help of a mapping name -> index, convert a function_set list -// to an Octave array of indices. octave_value -profile_data_accumulator::stats::function_set_value (const function_set& list, - const fcn_index_map& idx) +profile_data_accumulator::stats::function_set_value (const function_set& list) { const octave_idx_type n = list.size (); @@ -68,9 +66,7 @@ octave_idx_type i = 0; for (function_set::const_iterator p = list.begin (); p != list.end (); ++p) { - fcn_index_map::const_iterator q = idx.find (*p); - assert (q != idx.end ()); - retval (i) = q->second; + retval(i) = *p; ++i; } assert (i == n); @@ -78,19 +74,144 @@ return retval; } +profile_data_accumulator::tree_node::tree_node (tree_node* p, octave_idx_type f) + : parent (p), fcn_id (f), children (), time (0.0), calls (0) +{} + +profile_data_accumulator::tree_node::~tree_node () +{ + for (child_map::iterator i = children.begin (); i != children.end (); ++i) + delete i->second; +} + +profile_data_accumulator::tree_node* +profile_data_accumulator::tree_node::enter (octave_idx_type fcn) +{ + tree_node* retval; + + child_map::iterator pos = children.find (fcn); + if (pos == children.end ()) + { + retval = new tree_node (this, fcn); + children[fcn] = retval; + } + else + retval = pos->second; + + ++retval->calls; + return retval; +} + +profile_data_accumulator::tree_node* +profile_data_accumulator::tree_node::exit (octave_idx_type fcn) +{ + assert (parent); + assert (fcn_id == fcn); + + return parent; +} + +void +profile_data_accumulator::tree_node::build_flat (flat_profile& data) const +{ + // If this is not the top-level node, update profile entry for this function. + if (fcn_id != 0) + { + stats& entry = data[fcn_id - 1]; + + entry.time += time; + entry.calls += calls; + + assert (parent); + if (parent->fcn_id != 0) + { + entry.parents.insert (parent->fcn_id); + data[parent->fcn_id - 1].children.insert (fcn_id); + } + + if (!entry.recursive) + for (const tree_node* i = parent; i; i = i->parent) + if (i->fcn_id == fcn_id) + { + entry.recursive = true; + break; + } + } + + // Recurse on children. + for (child_map::const_iterator i = children.begin (); + i != children.end (); ++i) + i->second->build_flat (data); +} + +octave_value +profile_data_accumulator::tree_node::get_hierarchical (void) const +{ + /* Note that we don't generate the entry just for this node, but rather + a struct-array with entries for all children. This way, the top-node + (for which we don't want a real entry) generates already the final + hierarchical profile data. */ + + const octave_idx_type n = children.size (); + + Cell rv_indices (n, 1); + Cell rv_times (n, 1); + Cell rv_calls (n, 1); + Cell rv_children (n, 1); + + octave_idx_type i = 0; + for (child_map::const_iterator p = children.begin (); + p != children.end (); ++p) + { + const tree_node& entry = *p->second; + + rv_indices(i) = octave_value (p->first); + rv_times(i) = octave_value (entry.time); + rv_calls(i) = octave_value (entry.calls); + rv_children(i) = entry.get_hierarchical (); + + ++i; + } + assert (i == n); + + Octave_map retval; + + retval.assign ("Index", rv_indices); + retval.assign ("SelfTime", rv_times); + retval.assign ("NumCalls", rv_calls); + retval.assign ("Children", rv_children); + + return retval; +} + profile_data_accumulator::profile_data_accumulator () - : enabled (false), call_stack (), data (), last_time (-1.0) + : known_functions (), fcn_index (), + enabled (false), call_tree (NULL), last_time (-1.0) {} +profile_data_accumulator::~profile_data_accumulator () +{ + if (call_tree) + delete call_tree; +} + void profile_data_accumulator::set_active (bool value) { - // If we enable, clear the call-stack. This ensures we freshly start - // with collecting times now. if (value) { - while (!call_stack.empty ()) - call_stack.pop_back (); + // Create a call-tree top-node if there isn't yet one. + if (!call_tree) + call_tree = new tree_node (NULL, 0); + + // Let the top-node be the active one. This ensures we have a clean + // fresh start collecting times. + active_fcn = call_tree; + } + else + { + // Make sure we start with fresh timing if we're re-enabled later. + last_time = -1.0; } enabled = value; @@ -101,39 +222,34 @@ { // The enter class will check and only call us if the profiler is active. assert (is_active ()); + assert (call_tree); // If there is already an active function, add to its time before // pushing the new one. - if (!call_stack.empty ()) + if (active_fcn != call_tree) add_current_time (); - // Update non-timing related data for the function entered. - stats& entry = data[fcn]; - ++entry.calls; - if (!call_stack.empty ()) + // Map the function's name to its index. + octave_idx_type fcn_idx; + fcn_index_map::iterator pos = fcn_index.find (fcn); + if (pos == fcn_index.end ()) { - const std::string parent_name = call_stack.back (); - entry.parents.insert (parent_name); - data[parent_name].children.insert (fcn); + known_functions.push_back (fcn); + fcn_idx = known_functions.size (); + fcn_index[fcn] = fcn_idx; } - if (!entry.recursive) - for (call_stack_type::iterator i = call_stack.begin (); - i != call_stack.end (); ++i) - if (*i == fcn) - { - entry.recursive = true; - break; - } + else + fcn_idx = pos->second; - call_stack.push_back (fcn); + active_fcn = active_fcn->enter (fcn_idx); last_time = query_time (); } void profile_data_accumulator::exit_function (const std::string& fcn) { - assert (!call_stack.empty ()); - assert (fcn == call_stack.back ()); + assert (call_tree); + assert (active_fcn != call_tree); // Usually, if we are disabled this function is not even called. But the // call disabling the profiler is an exception. So also check here @@ -141,7 +257,9 @@ if (is_active ()) add_current_time (); - call_stack.pop_back (); + fcn_index_map::iterator pos = fcn_index.find (fcn); + assert (pos != fcn_index.end ()); + active_fcn = active_fcn->exit (pos->second); // If this was an "inner call", we resume executing the parent function // up the stack. So note the start-time for this! @@ -157,26 +275,26 @@ return; } - data.clear (); + known_functions.clear (); + fcn_index.clear (); + + if (call_tree) + { + delete call_tree; + call_tree = NULL; + } + last_time = -1.0; } octave_value -profile_data_accumulator::get_data (void) const +profile_data_accumulator::get_flat (void) const { - const octave_idx_type n = data.size (); + const octave_idx_type n = known_functions.size (); - // For the parent/child data, we need to map function key-names - // to the indices they correspond to in the output array. Find them out - // in a preparation step. - fcn_index_map fcn_indices; - octave_idx_type i = 0; - for (stats_map::const_iterator p = data.begin (); p != data.end (); ++p) - { - fcn_indices[p->first] = i + 1; - ++i; - } - assert (i == n); + flat_profile flat (n); + assert (call_tree); + call_tree->build_flat (flat); Cell rv_names (n, 1); Cell rv_times (n, 1); @@ -185,21 +303,15 @@ Cell rv_parents (n, 1); Cell rv_children (n, 1); - i = 0; - for (stats_map::const_iterator p = data.begin (); p != data.end (); ++p) + for (octave_idx_type i = 0; i != n; ++i) { - const stats& entry = p->second; - - rv_names (i) = octave_value (p->first); - rv_times (i) = octave_value (entry.time); - rv_calls (i) = octave_value (entry.calls); - rv_recursive (i) = octave_value (entry.recursive); - rv_parents (i) = stats::function_set_value (entry.parents, fcn_indices); - rv_children (i) = stats::function_set_value (entry.children, fcn_indices); - - ++i; + rv_names(i) = octave_value (known_functions[i]); + rv_times(i) = octave_value (flat[i].time); + rv_calls(i) = octave_value (flat[i].calls); + rv_recursive(i) = octave_value (flat[i].recursive); + rv_parents(i) = stats::function_set_value (flat[i].parents); + rv_children(i) = stats::function_set_value (flat[i].children); } - assert (i == n); Octave_map retval; @@ -213,6 +325,13 @@ return retval; } +octave_value +profile_data_accumulator::get_hierarchical (void) const +{ + assert (call_tree); + return call_tree->get_hierarchical (); +} + double profile_data_accumulator::query_time (void) const { @@ -226,14 +345,8 @@ const double t = query_time (); assert (last_time >= 0.0 && last_time <= t); - assert (!call_stack.empty ()); - const std::string name = call_stack.back (); - - // The entry for this function should already be created; namely - // when entering the function via the non-timing data collection! - stats_map::iterator pos = data.find (name); - assert (pos != data.end ()); - pos->second.time += t - last_time; + assert (call_tree && active_fcn != call_tree); + active_fcn->add_time (t - last_time); } profile_data_accumulator profiler; @@ -283,7 +396,7 @@ } // Query the timings collected by the profiler. -DEFUN (__profiler_data, args, , +DEFUN (__profiler_data, args, nargout, "-*- texinfo -*-\n\ @deftypefn {Function File} __profiler_data ()\n\ Undocumented internal function.\n\ @@ -295,7 +408,9 @@ if (nargin > 0) warning ("profiler_data: ignoring extra arguments"); - retval(0) = profiler.get_data (); + retval(0) = profiler.get_flat (); + if (nargout > 1) + retval(1) = profiler.get_hierarchical (); return retval; } diff -r 1be5f06d9fa7 -r a451ae847adb src/profiler.h --- a/src/profiler.h Tue Aug 09 23:23:59 2011 -0400 +++ b/src/profiler.h Thu Aug 11 17:55:23 2011 +0200 @@ -40,72 +40,113 @@ // functions in a manner protected from stack unwinding. class enter { - private: + private: - profile_data_accumulator& acc; + profile_data_accumulator& acc; + std::string fcn; - std::string fcn; + public: - public: + enter (profile_data_accumulator&, const std::string&); + virtual ~enter (void); - enter (profile_data_accumulator&, const std::string&); + private: - virtual ~enter (void); - - private: - - // No copying! - - enter (const enter&); - - enter& operator = (const enter&); + // No copying! + enter (const enter&); + enter& operator = (const enter&); }; profile_data_accumulator (void); + virtual ~profile_data_accumulator (); bool is_active (void) const { return enabled; } - void set_active (bool); void reset (void); - octave_value get_data (void) const; + octave_value get_flat (void) const; + octave_value get_hierarchical (void) const; private: - typedef std::set function_set; + // One entry in the flat profile (i.e., a collection of data for a single + // function). This is filled in when building the flat profile from the + // hierarchical call tree. + struct stats + { + stats (); + + double time; + unsigned calls; + + bool recursive; + + typedef std::set function_set; + function_set parents; + function_set children; + + // Convert a function_set list to an Octave array of indices. + static octave_value function_set_value (const function_set&); + }; + + typedef std::vector flat_profile; + + // Store data for one node in the call-tree of the hierarchical profiler + // data we collect. + class tree_node + { + public: + + tree_node (tree_node*, octave_idx_type); + virtual ~tree_node (); + + void add_time (double dt) { time += dt; } + + // Enter a child function. It is created in the list of children if it + // wasn't already there. The now-active child node is returned. + tree_node* enter (octave_idx_type); + + // Exit function. As a sanity-check, it is verified that the currently + // active function actually is the one handed in here. Returned is the + // then-active node, which is our parent. + tree_node* exit (octave_idx_type); + + void build_flat (flat_profile&) const; + octave_value get_hierarchical (void) const; + + private: + + tree_node* parent; + octave_idx_type fcn_id; + + typedef std::map child_map; + child_map children; + + // This is only time spent *directly* on this level, excluding children! + double time; + + unsigned calls; + + // No copying! + tree_node (const tree_node&); + tree_node& operator = (const tree_node&); + }; + + // Each function we see in the profiler is given a unique index (which + // simply counts starting from 1). We thus have to map profiler-names to + // those indices. For all other stuff, we identify functions by their index. + + typedef std::vector function_set; typedef std::map fcn_index_map; - // Store some statistics data collected for a function. - class stats - { - private: - - double time; - unsigned calls; - - bool recursive; - - function_set parents; - function_set children; - - public: - - stats (); - - static octave_value - function_set_value (const function_set&, const fcn_index_map&); - - friend class profile_data_accumulator; - }; + function_set known_functions; + fcn_index_map fcn_index; bool enabled; - typedef std::vector call_stack_type; - call_stack_type call_stack; - - typedef std::map stats_map; - stats_map data; + tree_node* call_tree; + tree_node* active_fcn; // Store last timestamp we had, when the currently active function was called. double last_time; @@ -117,19 +158,17 @@ // Query a timestamp, used for timing calls (obviously). // This is not static because in the future, maybe we want a flag - // in the profiler or something to choose between cputime, wall-time + // in the profiler or something to choose between cputime, wall-time, // user-time, system-time, ... double query_time () const; - // Add the time elapsed since last_time to the function on the top - // of our call-stack. This is called from two different positions, - // thus it is useful to have it as a seperate function. + // Add the time elapsed since last_time to the function we're currently in. + // This is called from two different positions, thus it is useful to have + // it as a seperate function. void add_current_time (void); // No copying! - profile_data_accumulator (const profile_data_accumulator&); - profile_data_accumulator& operator = (const profile_data_accumulator&); };