# # patch "ChangeLog" # from [26339e508956ca97d4b7348a8adf035c90b7f282] # to [aab92dc5ce057e81ca6e5a919e5489aed4e1eb05] # # patch "cset.cc" # from [c6fb354bb5d0106ac7f8a180a062f8c0dbae8572] # to [da1e125876b92306a552da0e76088c448ea75310] # ======================================================================== --- ChangeLog 26339e508956ca97d4b7348a8adf035c90b7f282 +++ ChangeLog aab92dc5ce057e81ca6e5a919e5489aed4e1eb05 @@ -1,3 +1,7 @@ +2005-08-18 graydon hoare + + * cset.cc: Rewrite automaton, some more (slow) progress. + 2005-08-10 graydon hoare * cset.cc: Automaton test gets a few hundred edits in. ======================================================================== --- cset.cc c6fb354bb5d0106ac7f8a180a062f8c0dbae8572 +++ cset.cc da1e125876b92306a552da0e76088c448ea75310 @@ -24,10 +24,10 @@ A cset is a change_consumer (see below) and also an object which can be analyzed to *feed* a change_consumer. Structurally, it is: - - a pair of mfests "old" and "new" + - a pair of mfests "old" and "new" - an "incubator" set of unborn nodes - a "graveyard" set of killed nodes - - a pair of maps relating copied nodes in "new" to + - a pair of maps relating copied nodes in "new" to their corresponding pre-copy node in "old" - a pair of maps connecting nodes to their parents in "old" and "new" (these cannot be stored in the @@ -41,8 +41,8 @@ As well, the parents in each map should always match up with the child relationships stored in the nodes, and the new->old and old->new maps should agree. - + Change_consumers: ~~~~~~~~~~~~~~~~~ @@ -87,7 +87,7 @@ Attach and detach lists: ~~~~~~~~~~~~~~~~~~~~~~~~ - This is a subtle point. Read it until it's clear. + This is a subtle point. Read it until it's clear. Csets contain an implicit order to their operations: a block of delete actions, then renames, adds, deltas, and finally attribute @@ -114,12 +114,12 @@ Suppose we executed them "sequentially". For the first rename we would do this: - + - find the node "A/B" in the OLD mfest, call it file_0. - - find file_0 in the NEW mfest, call its parent parent_0. + - find file_0 in the NEW mfest, call its parent parent_0. - detach "B" from parent_0 in the NEW mfest. - find "P" in the NEW mfest, call it parent_1. - - attach file_0 to parent_1 under the name "Q". + - attach file_0 to parent_1 under the name "Q". This last action will fail. It will fail because parent_1 already has a child called Q in the second mfest (it's the source of the next @@ -134,12 +134,12 @@ triggers the second half; it is called after the last rename is processed. Here's how we *actually* process the renames: - - buffer all the renames into a vector of src/dst pairs, in order. + - buffer all the renames into a vector of src/dst pairs, in order. - when the user calls finalize_renames(): - build a temporary vector of nodes with the same length as the buffer - - walk the vector from end to beginning, detaching each node from + - walk the vector from end to beginning, detaching each node from the NEW tree and saving it in the temporary vector ("bottom-up") - - walk the vector from beginning to end, attaching each node to + - walk the vector from beginning to end, attaching each node to the NEW tree ("top-down") */ @@ -181,8 +181,10 @@ using std::stack; using std::string; using std::vector; +using std::back_inserter; using std::inserter; using std::copy; +using std::search; using std::make_pair; using boost::lexical_cast; using boost::shared_ptr; @@ -209,16 +211,16 @@ // life of the program. They're expected to be small and very frequently // reused. - struct + struct hash - { + { size_t operator()(string const * const & x) const - { + { return __gnu_cxx::__stl_hash_string(x->c_str()); } }; - - struct + + struct eq { bool operator()(string const * const & x, @@ -258,7 +260,7 @@ { set(""); } - + dirent_t(string const & s) { set(s); @@ -270,12 +272,12 @@ }; -dirent_t::stable_string_set +dirent_t::stable_string_set dirent_t::stable_strings; bool -operator<(dirent_t const & a, +operator<(dirent_t const & a, dirent_t const & b) { return *(a.stable_string) < *(b.stable_string); @@ -305,7 +307,6 @@ return o << *(d.stable_string); } - struct node; struct dir_node; struct file_node; @@ -316,15 +317,16 @@ typedef shared_ptr file_t; // dirmap *must* be a sorted map, do not change to a hashed map -typedef map dirmap_t; +typedef map dirmap_t; typedef string attr_name; typedef string attr_val; +typedef map attrmap_t; // this helper class represents an "unpacked" view of the path // through a tree of directory nodes to some specific leaf (either // dir or file); it is just a faster form of a non-empty file_path -struct +struct path_vec_t { vector dir; @@ -334,7 +336,7 @@ { fs::path fp(f()); path_vec_t pv; - for (fs::path::iterator i = fp.begin(); + for (fs::path::iterator i = fp.begin(); i != fp.end(); ++i) { pv.dir.push_back(dirent_t(*i)); @@ -385,7 +387,7 @@ b.push_back(pvb.leaf); return lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); - + } @@ -393,56 +395,90 @@ // nodes //================================================================== -struct +struct node { - map attrs; + attrmap_t attrs; - bool has_attrs() + node() { + // L(F("constructed node %x\n") % this); + } + + bool has_attrs() + { return !attrs.empty(); } - bool has_attr(attr_name const & name) - { + bool has_attr(attr_name const & name) + { return attrs.find(name) != attrs.end(); } - attr_val const & get_attr(attr_name const & name) - { - map::const_iterator i = attrs.find(name); + attr_val const & get_attr(attr_name const & name) + { + attrmap_t::const_iterator i = attrs.find(name); I(i != attrs.end()); return i->second; } void set_attr(attr_name const & name, - attr_val const & val) + attr_val const & val) { attrs[name] = val; } - void clear_attr(attr_name const & name) + void clear_attr(attr_name const & name) { attrs.erase(name); } virtual node_t shallow_copy() const = 0; - virtual ~node() {} + virtual ~node() + { + // L(F("destructing node %x\n") % this); + } }; -struct -file_node +namespace std +{ + +ostream & +operator<<(ostream & o, attrmap_t const & attrs) +{ + for (attrmap_t::const_iterator i = attrs.begin(); + i != attrs.end(); ++i) + { + if (i != attrs.begin()) + o << ", "; + o << "'" << i->first << "' -> '" << i->second << "'"; + } + return o; +} + +} + +struct +file_node : public node { file_id content; - + + file_node() + { + // L(F("constructed file_node %x\n") % this); + } + virtual node_t shallow_copy() const; - virtual ~file_node() {} + virtual ~file_node() + { + // L(F("destructing file_node %x\n") % this); + } }; -node_t +node_t file_node::shallow_copy() const { file_t f = file_t(new file_node()); @@ -453,11 +489,16 @@ struct -dir_node +dir_node : public node { dirmap_t entries; + dir_node() + { + // L(F("constructed dir_node %x\n") % this); + } + // informative bool contains_entry(dirent_t p) const; @@ -467,11 +508,14 @@ void drop_child(dirent_t c); virtual node_t shallow_copy() const; - virtual ~dir_node() {} + virtual ~dir_node() + { + // L(F("destructing dir_node %x\n") % this); + } }; -void +void dir_node::add_child(dirent_t name, node_t n) { I(!contains_entry(name)); @@ -479,7 +523,7 @@ } -void +void dir_node::drop_child(dirent_t c) { I(contains_entry(c)); @@ -487,7 +531,7 @@ } -static inline bool +static inline bool is_dir_t(node_t n) { dir_t d = dynamic_pointer_cast(n); @@ -495,7 +539,7 @@ } -static inline bool +static inline bool is_file_t(node_t n) { file_t f = dynamic_pointer_cast(n); @@ -503,7 +547,7 @@ } -static inline dir_t +static inline dir_t downcast_to_dir_t(node_t const n) { dir_t d = dynamic_pointer_cast(n); @@ -512,7 +556,7 @@ } -static inline file_t +static inline file_t downcast_to_file_t(node_t const n) { file_t f = dynamic_pointer_cast(n); @@ -521,7 +565,7 @@ } -struct +struct dfs_iter { // NB: the dfs_iter struct *does not return* the node it's @@ -553,13 +597,13 @@ return stk.top().second->second; } - void path(path_vec_t & pv) + void path(path_vec_t & pv) { I(!finished()); pv.dir = prefix; leaf(pv.leaf); } - + void leaf(dirent_t & d) { I(!finished()); @@ -581,7 +625,7 @@ else ++(stk.top().second); - while (!stk.empty() + while (!stk.empty() && stk.top().second == stk.top().first->entries.end()) { stk.pop(); @@ -594,7 +638,7 @@ }; -struct +struct bfs_iter { deque q; @@ -633,7 +677,7 @@ }; -bool +bool dir_node::contains_entry(dirent_t p) const { map::const_iterator i = entries.find(p); @@ -643,7 +687,7 @@ } -node_t +node_t dir_node::get_entry(dirent_t p) const { dirmap_t::const_iterator i = entries.find(p); @@ -652,7 +696,7 @@ } -node_t +node_t dir_node::shallow_copy() const { dir_t d = dir_t(new dir_node()); @@ -665,57 +709,72 @@ namespace __gnu_cxx { template <> - struct hash - { - size_t + struct hash + { + size_t operator()(node_t const & x) const - { + { return reinterpret_cast(x.get()); } }; template <> - struct hash - { - size_t - operator()(node * const & x) const - { - return reinterpret_cast(x); - } - }; - - template <> - struct hash - { - size_t + struct hash + { + size_t operator()(weak_node_t const & x) const - { + { return reinterpret_cast(x.lock().get()); } }; } + bool operator==(weak_node_t const & a, weak_node_t const & b) { - return a.lock().get() == b.lock().get(); + return a.lock().get() == b.lock().get() && !(a < b || b < a); } +bool +operator==(node_t const & a, + node_t const & b) +{ + return a.get() == b.get() && !(a < b || b < a); +} + + +bool +operator==(dir_t const & a, + dir_t const & b) +{ + return a.get() == b.get() && !(a < b || b < a); +} + + +bool +operator==(file_t const & a, + file_t const & b) +{ + return a.get() == b.get() && !(a < b || b < a); +} + + //================================================================== // mfests //================================================================== -struct +struct mfest { dir_t root; - mfest() : root(new dir_node()) + mfest() : root(new dir_node()) {} - + mfest(mfest const & other); void reset(mfest const & other); void check_sane() const; @@ -725,16 +784,16 @@ bool dir_exists(file_path dp) const; // Imperative parts (fault on lookup failures). - void lookup(path_vec_t const & pth, + void lookup(path_vec_t const & pth, dir_t & parent, node_t & leaf); }; -void +void mfest::lookup(path_vec_t const & pth, dir_t & parent, node_t & leaf) { parent = root; - for (vector::const_iterator i = pth.dir.begin(); + for (vector::const_iterator i = pth.dir.begin(); i != pth.dir.end(); ++i) { parent = downcast_to_dir_t(parent->get_entry(*i)); @@ -761,17 +820,16 @@ mfest::check_sane() const { // Check for absence of cycles. - hash_set seen; + hash_set seen; for(bfs_iter i(root); !i.finished(); ++i) { - node *n = (*i).get(); - I(seen.find(n) == seen.end()); - seen.insert(n); + I(seen.find(*i) == seen.end()); + seen.insert(*i); } } -bool +bool mfest::file_exists(file_path fp) const { path_vec_t v = path_vec_t::from_file_path(fp); @@ -781,7 +839,7 @@ { if (!d->contains_entry(*i)) return false; - + node_t n = d->get_entry(*i); if (!is_dir_t(n)) @@ -789,12 +847,12 @@ d = downcast_to_dir_t(n); } - return d->contains_entry(v.leaf) + return d->contains_entry(v.leaf) && is_file_t(d->get_entry(v.leaf)); } -bool +bool mfest::dir_exists(file_path dp) const { path_vec_t v = path_vec_t::from_file_path(dp); @@ -804,7 +862,7 @@ { if (!d->contains_entry(*i)) return false; - + node_t n = d->get_entry(*i); if (!is_dir_t(n)) @@ -812,12 +870,12 @@ d = downcast_to_dir_t(n); } - return d->contains_entry(v.leaf) + return d->contains_entry(v.leaf) && is_dir_t(d->get_entry(v.leaf)); } -bool +bool operator==(mfest const & ma, mfest const & mb) { @@ -825,7 +883,7 @@ // it actually short-circuits any time it sees identical node pointers // rather than structurally comparing their subtrees. - if (ma.root.get() == mb.root.get()) + if (ma.root == mb.root) return true; deque qa, qb; @@ -844,7 +902,7 @@ // Short-circuit comparisons when we actually // have the same pointers here. - if (a.get() == b.get()) + if (a == b) continue; if (a->attrs != b->attrs) @@ -857,16 +915,16 @@ if (! (fa->content == fb->content)) return false; } - + else if (is_dir_t(a) && is_dir_t(b)) { dir_t da = downcast_to_dir_t(a); dir_t db = downcast_to_dir_t(b); - + if (da->entries.size() != da->entries.size()) return false; - - for (dirmap_t::const_iterator + + for (dirmap_t::const_iterator i = da->entries.begin(), j = db->entries.begin(); i != da->entries.end(); ++i, ++j) @@ -899,25 +957,29 @@ virtual ~change_consumer() {} - void delete_dir(file_path const & pth); + void delete_dir(file_path const & pth, + attrmap_t const & attrs); void delete_file(file_path const & pth, - file_id const & ident); - void rename_node(file_path const & src, + file_id const & ident, + attrmap_t const & attrs); + void rename_node(file_path const & src, file_path const & dst); - void add_dir(file_path const & dp); + void add_dir(file_path const & dp, + attrmap_t const & attrs); void add_file(file_path const & fp, - file_id const & ident); + file_id const & ident, + attrmap_t const & attrs); - void apply_delta(file_path const & path, - file_id const & src, + void apply_delta(file_path const & path, + file_id const & src, file_id const & dst); - void set_attr(file_path const & path, - attr_name const & name, + void set_attr(file_path const & path, + attr_name const & name, attr_val const & val); - void clear_attr(file_path const & path, + void clear_attr(file_path const & path, attr_name const & name); virtual void finalize_renames() {} @@ -926,47 +988,53 @@ // composing or parsing file_paths in their string-y form. virtual void delete_file(path_vec_t const & path, - file_id const & ident) = 0; - virtual void delete_dir(path_vec_t const & path) = 0; + file_id const & ident, + attrmap_t const & attrs) = 0; + virtual void delete_dir(path_vec_t const & path, + attrmap_t const & attrs) = 0; - virtual void rename_node(path_vec_t const & src, + virtual void rename_node(path_vec_t const & src, path_vec_t const & dst) = 0; - - virtual void add_dir(path_vec_t const & dp) = 0; + + virtual void add_dir(path_vec_t const & dp, + attrmap_t const & attrs) = 0; virtual void add_file(path_vec_t const & fp, - file_id const & ident) = 0; + file_id const & ident, + attrmap_t const & attrs) = 0; - virtual void apply_delta(path_vec_t const & path, - file_id const & src, + virtual void apply_delta(path_vec_t const & path, + file_id const & src, file_id const & dst) = 0; - virtual void set_attr(path_vec_t const & path, - attr_name const & name, + virtual void set_attr(path_vec_t const & path, + attr_name const & name, attr_val const & val) = 0; - virtual void clear_attr(path_vec_t const & path, + virtual void clear_attr(path_vec_t const & path, attr_name const & name) = 0; }; -void -change_consumer::delete_dir(file_path const & dp) +void +change_consumer::delete_dir(file_path const & dp, + attrmap_t const & attrs) { - L(F("delete_dir('%s')\n") % dp); - this->delete_dir(path_vec_t::from_file_path(dp)); + L(F("delete_dir('%s', [%s])\n") % dp % attrs); + this->delete_dir(path_vec_t::from_file_path(dp), attrs); } -void +void change_consumer::delete_file(file_path const & fp, - file_id const & ident) + file_id const & ident, + attrmap_t const & attrs) { I(!ident.inner()().empty()); - L(F("delete_file('%s', '%s')\n") % fp % ident); - this->delete_file(path_vec_t::from_file_path(fp), ident); + L(F("delete_file('%s', [%s], [%s])\n") % fp % ident % attrs); + this->delete_file(path_vec_t::from_file_path(fp), ident, attrs); } -void +void change_consumer::rename_node(file_path const & a, file_path const & b) { @@ -976,27 +1044,29 @@ } -void -change_consumer::add_dir(file_path const & dp) +void +change_consumer::add_dir(file_path const & dp, + attrmap_t const & attrs) { - L(F("add_dir('%s')\n") % dp); - this->add_dir(path_vec_t::from_file_path(dp)); + L(F("add_dir('%s', [%s])\n") % dp % attrs); + this->add_dir(path_vec_t::from_file_path(dp), attrs); } -void +void change_consumer::add_file(file_path const & fp, - file_id const & ident) + file_id const & ident, + attrmap_t const & attrs) { - L(F("add_file('%s', '%s')\n") % fp % ident); + L(F("add_file('%s', [%s], [%s])\n") % fp % ident % attrs); I(!ident.inner()().empty()); - this->add_file(path_vec_t::from_file_path(fp), ident); + this->add_file(path_vec_t::from_file_path(fp), ident, attrs); } -void -change_consumer::apply_delta(file_path const & path, - file_id const & src, +void +change_consumer::apply_delta(file_path const & path, + file_id const & src, file_id const & dst) { L(F("apply_delta('%s', [%s], [%s])\n") % path % src % dst); @@ -1006,9 +1076,9 @@ } -void -change_consumer::set_attr(file_path const & path, - attr_name const & name, +void +change_consumer::set_attr(file_path const & path, + attr_name const & name, attr_val const & val) { L(F("set_attr('%s', '%s', '%s')\n") % path % name % val); @@ -1016,8 +1086,8 @@ } -void -change_consumer::clear_attr(file_path const & path, +void +change_consumer::clear_attr(file_path const & path, attr_name const & name) { L(F("clear_attr('%s', '%s')\n") % path % name); @@ -1029,7 +1099,7 @@ // attach_detach_change_consumers //================================================================== -struct +struct attach_detach_change_consumer : public change_consumer { @@ -1038,42 +1108,44 @@ virtual node_t detach(path_vec_t const & path) = 0; virtual void attach(path_vec_t const & path, node_t id) = 0; - // Renames are directed into a temporary buffer, which + // Renames are directed into a temporary buffer, which // then executes them in two steps using the lower-level API // "attach" and "detach". vector< pair > pending_renames; - virtual void rename_node(path_vec_t const & src, + virtual void rename_node(path_vec_t const & src, path_vec_t const & dst); virtual void finalize_renames(); }; -void -attach_detach_change_consumer::rename_node(path_vec_t const & src, +void +attach_detach_change_consumer::rename_node(path_vec_t const & src, path_vec_t const & dst) { pending_renames.push_back(make_pair(src, dst)); } -void +void attach_detach_change_consumer::finalize_renames() { vector > tmp; - - for (vector< pair >::reverse_iterator i = pending_renames.rbegin(); + + for (vector< pair >::reverse_iterator i = pending_renames.rbegin(); i != pending_renames.rend(); ++i) { tmp.push_back(make_pair(i->second, detach(i->first))); } - for (vector< pair >::const_iterator i = tmp.begin(); + for (vector< pair >::const_iterator i = tmp.begin(); i != tmp.end(); ++i) { attach(i->first, i->second); } + + pending_renames.clear(); } @@ -1083,10 +1155,10 @@ //================================================================== -struct -cset +struct +cset : public attach_detach_change_consumer -{ +{ hash_set incubator; mfest old_mfest; @@ -1102,17 +1174,17 @@ void get_old_path(node_t const & leaf, path_vec_t & pth) const; void get_new_path(node_t const & leaf, path_vec_t & pth) const; - void set_old_parent(node_t const & child, - dir_t const & parent, + void set_old_parent(node_t const & child, + dir_t const & parent, dirent_t name_in_parent); - void set_new_parent(node_t const & child, - dir_t const & parent, + void set_new_parent(node_t const & child, + dir_t const & parent, dirent_t name_in_parent); - void get_old_parent(node_t const & child, - dir_t & parent, + void get_old_parent(node_t const & child, + dir_t & parent, dirent_t & name_in_parent) const; - void get_new_parent(node_t const & child, - dir_t & parent, + void get_new_parent(node_t const & child, + dir_t & parent, dirent_t & name_in_parent) const; // Fields and accessors related to the new <-> old bijection. @@ -1120,18 +1192,19 @@ hash_map old_to_new; hash_map new_to_old; - void update_old_new_mapping(weak_node_t old, + void update_old_new_mapping(weak_node_t old, weak_node_t modified_new); - void get_corresponding_new_weak_node(weak_node_t old_or_unborn, + void get_corresponding_new_weak_node(weak_node_t old_or_unborn, weak_node_t & new_or_killed) const; - void get_corresponding_old_weak_node(weak_node_t new_or_killed, + void get_corresponding_old_weak_node(weak_node_t new_or_killed, weak_node_t & old_or_unborn) const; - void get_corresponding_new_node(weak_node_t old_or_unborn, + void get_corresponding_new_node(weak_node_t old_or_unborn, node_t & new_or_killed) const; - void get_corresponding_old_node(weak_node_t new_or_killed, + void get_corresponding_old_node(weak_node_t new_or_killed, node_t & old_or_unborn) const; // Accessors related to copy-on-write. + node_t form_new_node(node_t n); node_t unique_entry_in_new_dir(dir_t new_dir, dirent_t entry); node_t get_writable_node(path_vec_t const & pth); dir_t get_writable_dir_containing(path_vec_t const & pth); @@ -1161,32 +1234,36 @@ virtual void finalize_renames(); - virtual void delete_dir(path_vec_t const & dp); + virtual void delete_dir(path_vec_t const & dp, + attrmap_t const & attrs); virtual void delete_file(path_vec_t const & dp, - file_id const & ident); + file_id const & ident, + attrmap_t const & attrs); virtual node_t detach(path_vec_t const & path); virtual void attach(path_vec_t const & path, node_t id); - virtual void add_dir(path_vec_t const & dp); + virtual void add_dir(path_vec_t const & dp, + attrmap_t const & attrs); virtual void add_file(path_vec_t const & fp, - file_id const & ident); + file_id const & ident, + attrmap_t const & attrs); - virtual void apply_delta(path_vec_t const & path, - file_id const & src, + virtual void apply_delta(path_vec_t const & path, + file_id const & src, file_id const & dst); - virtual void set_attr(path_vec_t const & path, - attr_name const & name, + virtual void set_attr(path_vec_t const & path, + attr_name const & name, attr_val const & val); - virtual void clear_attr(path_vec_t const & path, + virtual void clear_attr(path_vec_t const & path, attr_name const & name); }; -void +void cset::reset(mfest const & m) { incubator.clear(); @@ -1211,19 +1288,19 @@ } } -bool +bool cset::is_unborn(node_t const & n) const { return incubator.find(n) != incubator.end(); } -bool +bool cset::is_live(node_t const & n) const { return !(is_unborn(n) || is_killed(n)); } -bool +bool cset::is_killed(node_t const & n) const { return graveyard.find(n) != graveyard.end(); @@ -1239,21 +1316,21 @@ I(!i->first.expired()); I(!i->second.expired()); hash_map::const_iterator j = b.find(i->second); - I(j != b.end()); + I(j != b.end()); I(!j->first.expired()); I(!j->second.expired()); - I(j->first.lock().get() == i->second.lock().get()); - I(j->second.lock().get() == i->first.lock().get()); + I(j->first == i->second); + I(j->second == i->first); } } -void +void cset::check_sane() const { old_mfest.check_sane(); new_mfest.check_sane(); - + confirm_weak_maps_agree(new_to_old, old_to_new); confirm_weak_maps_agree(old_to_new, new_to_old); @@ -1265,11 +1342,14 @@ I(is_live(*i)); dir_t parent, indexed_parent; dirent_t entry, indexed_entry; + // path_vec_t pv; i.cwd(parent); i.leaf(entry); + // i.path(pv); + // L(F("sanity pass 1: checking %s\n") % pv.to_file_path()); get_old_parent(*i, indexed_parent, indexed_entry); I(entry == indexed_entry); - I(parent.get() == indexed_parent.get()); + I(parent == indexed_parent); } for (dfs_iter i(new_mfest.root); !i.finished(); ++i) @@ -1277,8 +1357,11 @@ I(is_live(*i)); dir_t parent, indexed_parent; dirent_t entry, indexed_entry; + // path_vec_t pv; i.cwd(parent); i.leaf(entry); + // i.path(pv); + // L(F("sanity pass 2: checking %s\n") % pv.to_file_path()); get_new_parent(*i, indexed_parent, indexed_entry); I(entry == indexed_entry); @@ -1287,68 +1370,72 @@ // the indexed parent may be the old one if there was // no copy made. // - // I(parent.get() == indexed_parent.get()); + // I(parent == indexed_parent); } - for (hash_set::const_iterator i = graveyard.begin(); + for (hash_set::const_iterator i = graveyard.begin(); i != graveyard.end(); ++i) { I(!is_unborn(*i)); I(new_to_old.find(weak_node_t(*i)) != new_to_old.end()); } - for (hash_set::const_iterator i = incubator.begin(); + for (hash_set::const_iterator i = incubator.begin(); i != incubator.end(); ++i) { I(!is_killed(*i)); I(old_to_new.find(weak_node_t(*i)) != old_to_new.end()); - } + } } -void +void cset::get_old_path(node_t const & leaf, path_vec_t & pth) const { pth.dir.clear(); - dir_t parent; + dir_t parent; get_old_parent(leaf, parent, pth.leaf); + I(is_dir_t(parent)); - for (node_t child = parent; - child.get() != old_mfest.root.get(); + for (node_t child = parent; + child != old_mfest.root; child = parent) { dirent_t child_entry; get_old_parent(child, parent, child_entry); + I(is_dir_t(parent)); pth.dir.push_back(child_entry); } - reverse(pth.dir.begin(), pth.dir.end()); + reverse(pth.dir.begin(), pth.dir.end()); } -void +void cset::get_new_path(node_t const & leaf, path_vec_t & pth) const { pth.dir.clear(); - dir_t parent; + dir_t parent; get_new_parent(leaf, parent, pth.leaf); + I(is_dir_t(parent)); - for (node_t child = parent; - child.get() != new_mfest.root.get(); + for (node_t child = parent; + child != new_mfest.root; child = parent) { dirent_t child_entry; get_new_parent(child, parent, child_entry); + I(is_dir_t(parent)); pth.dir.push_back(child_entry); } - reverse(pth.dir.begin(), pth.dir.end()); + reverse(pth.dir.begin(), pth.dir.end()); } -void -cset::set_old_parent(node_t const & child, - dir_t const & parent, +void +cset::set_old_parent(node_t const & child, + dir_t const & parent, dirent_t name_in_parent) { - // L(F("set_old_parent(child=0x%x, parent=0x%x, name='%s')\n") + // L(F("set_old_parent(child=%x, parent=%x, name='%s')\n") // % child.get() // % parent.get() // % name_in_parent); @@ -1357,12 +1444,12 @@ name_in_parent); } -void -cset::set_new_parent(node_t const & child, - dir_t const & parent, +void +cset::set_new_parent(node_t const & child, + dir_t const & parent, dirent_t name_in_parent) { - // L(F("set_new_parent(child=0x%x, parent=0x%x, name='%s')\n") + // L(F("set_new_parent(child=%x, parent=%x, name='%s')\n") // % child.get() // % parent.get() // % name_in_parent); @@ -1371,14 +1458,14 @@ name_in_parent); } -void -cset::get_old_parent(node_t const & child, - dir_t & parent, +void +cset::get_old_parent(node_t const & child, + dir_t & parent, dirent_t & name_in_parent) const { - // L(F("get_old_parent(0x%x)\n") % child.get()); - - hash_map >::const_iterator i = + // L(F("get_old_parent(%x)\n") % child.get()); + + hash_map >::const_iterator i = old_parents.find(weak_node_t(child)); I(i != old_parents.end()); @@ -1386,20 +1473,20 @@ parent = downcast_to_dir_t(i->second.first.lock()); name_in_parent = i->second.second; - // L(F("get_old_parent(0x%x) -> parent=0x%x name='%s' \n") + // L(F("get_old_parent(%x) -> parent=%x name='%s' \n") // % child.get() // % parent.get() // % name_in_parent); } -void -cset::get_new_parent(node_t const & child, - dir_t & parent, +void +cset::get_new_parent(node_t const & child, + dir_t & parent, dirent_t & name_in_parent) const { - // L(F("get_new_parent(0x%x)\n") % child.get()); + // L(F("get_new_parent(%x)\n") % child.get()); - hash_map >::const_iterator i = + hash_map >::const_iterator i = new_parents.find(weak_node_t(child)); @@ -1412,7 +1499,7 @@ else get_old_parent(child, parent, name_in_parent); - // L(F("get_new_parent(0x%x) -> parent=0x%x name='%s' \n") + // L(F("get_new_parent(%x) -> parent=%x name='%s' \n") // % child.get() // % parent.get() // % name_in_parent); @@ -1426,8 +1513,8 @@ // with an "old" (or unborn) node; namely when we've done a // copy-for-write. - // L(F("re-associating old 0x%x -> new 0x%x\n") - // % old.lock().get() + // L(F("re-associating old %x -> new %x\n") + // % old.lock().get() // % modified_new.lock().get()); weak_node_t existing_new; @@ -1439,36 +1526,62 @@ node_t +cset::form_new_node(node_t n) +{ + // L(F("forming new node from shallow copy\n")); + weak_node_t old_weak; + get_corresponding_old_weak_node(n, old_weak); + node_t result = n->shallow_copy(); + update_old_new_mapping(old_weak, weak_node_t(result)); + if (is_dir_t(result)) + { + dir_t d = downcast_to_dir_t(result); + for (dirmap_t::const_iterator i = d->entries.begin(); + i != d->entries.end(); ++i) + { + set_new_parent(i->second, d, i->first); + } + } + I(result.unique()); + // L(F("formed new node from shallow copy\n")); + return result; +} + +node_t cset::unique_entry_in_new_dir(dir_t new_dir, dirent_t entry) { // NB: We do not use "get_foo_entry" here because that reproduces the // shared_ptr, and we're using "unique" to check that the shared_ptr // is non-duplicated. - + dirmap_t::const_iterator e = new_dir->entries.find(entry); I(e != new_dir->entries.end()); node_t result; + // L(F("unique_entry_in_new_dir(new_dir=%x, entry='%s'): old node=%x refcount=%d\n") + // % new_dir.get() + // % entry + // % e->second.get() + // % e->second.use_count()); + if (e->second.unique()) result = e->second; else { - weak_node_t old_weak_node; - get_corresponding_old_weak_node(e->second, old_weak_node); - result = e->second->shallow_copy(); - I(result.unique()); - update_old_new_mapping(old_weak_node, weak_node_t(result)); + result = form_new_node(e->second); new_dir->entries[entry] = result; + // L(F("unique_entry_in_new_dir(new_dir=%x, entry='%s'): setting result=%x parent to new_dir=%x\n") + // % new_dir.get() % entry % result.get() % new_dir.get()); set_new_parent(result, new_dir, entry); } - // The node should now have exactly 2 references: + // The node should now have exactly 2 references: // 'new_dir->entries[entry]' and 'result'. I(result.use_count() == 2); - return result; + return result; } - + node_t cset::get_writable_node(path_vec_t const & pth) { @@ -1499,20 +1612,31 @@ if (!new_mfest.root.unique()) { - new_mfest.root = downcast_to_dir_t(new_mfest.root->shallow_copy()); - for (dirmap_t::const_iterator i = new_mfest.root->entries.begin(); - i != new_mfest.root->entries.end(); ++i) - set_new_parent(i->second, new_mfest.root, i->first); + // L(F("get_writable_dir_containing(%s): moving new mfest forwards\n") % pth.to_file_path()); + new_mfest.root = downcast_to_dir_t(form_new_node(new_mfest.root)); } I(new_mfest.root.unique()); - + + // L(F("get_writable_dir_containing(%s): uniqifying %d dir components\n") + // % pth.to_file_path() + // % pth.dir.size()); + dir_t d = new_mfest.root; for (vector::const_iterator i = pth.dir.begin(); i != pth.dir.end(); ++i) { - d = downcast_to_dir_t(unique_entry_in_new_dir(d,*i)); + // L(F("get_writable_dir_containing(%s): uniqifying dir component: %s\n") + // % pth.to_file_path() + // % *i); + node_t n = unique_entry_in_new_dir(d,*i); + d = downcast_to_dir_t(n); } + + // L(F("get_writable_dir_containing(%s): uniqified %d dir components\n") + // % pth.to_file_path() + // % pth.dir.size()); + return d; } @@ -1531,14 +1655,14 @@ } void -cset::get_corresponding_new_weak_node(weak_node_t old_or_unborn, +cset::get_corresponding_new_weak_node(weak_node_t old_or_unborn, weak_node_t & n) const { - // L(F("get_corresponding_new_weak_node(0x%x)\n") + // L(F("get_corresponding_new_weak_node(%x)\n") // % old_or_unborn.lock().get()); - hash_map::const_iterator i + hash_map::const_iterator i = old_to_new.find(old_or_unborn); if (i == old_to_new.end()) @@ -1546,7 +1670,7 @@ else n = i->second; - // L(F("get_corresponding_new_weak_node(0x%x) -> 0x%x\n") + // L(F("get_corresponding_new_weak_node(%x) -> %x\n") // % old_or_unborn.lock().get() // % n.lock().get()); @@ -1555,13 +1679,13 @@ void -cset::get_corresponding_old_weak_node(weak_node_t new_or_killed, +cset::get_corresponding_old_weak_node(weak_node_t new_or_killed, weak_node_t & o) const { - // L(F("get_corresponding_old_weak_node(0x%x)\n") + // L(F("get_corresponding_old_weak_node(%x)\n") // % new_or_killed.lock().get()); - hash_map::const_iterator i + hash_map::const_iterator i = new_to_old.find(new_or_killed); if (i == old_to_new.end()) @@ -1569,7 +1693,7 @@ else o = i->second; - // L(F("get_corresponding_old_weak_node(0x%x) -> 0x%x\n") + // L(F("get_corresponding_old_weak_node(%x) -> %x\n") // % new_or_killed.lock().get() // % o.lock().get()); @@ -1578,7 +1702,7 @@ void -cset::get_corresponding_new_node(weak_node_t old_or_unborn, +cset::get_corresponding_new_node(weak_node_t old_or_unborn, node_t & n) const { weak_node_t tmp; @@ -1588,7 +1712,7 @@ void -cset::get_corresponding_old_node(weak_node_t new_or_killed, +cset::get_corresponding_old_node(weak_node_t new_or_killed, node_t & n) const { weak_node_t tmp; @@ -1597,7 +1721,7 @@ } -void +void cset::finalize_renames() { attach_detach_change_consumer::finalize_renames(); @@ -1609,8 +1733,9 @@ } -void -cset::delete_dir(path_vec_t const & dp) +void +cset::delete_dir(path_vec_t const & dp, + attrmap_t const & attrs) { node_t n = this->detach(dp); I(is_dir_t(n)); @@ -1621,33 +1746,36 @@ // want to delete a dir, you have to delete everything inside // the dir first. I(downcast_to_dir_t(n)->entries.empty()); + I(n->attrs == attrs); graveyard.insert(n); if (global_sanity.debug) { check_sane(); - } + } } -void +void cset::delete_file(path_vec_t const & dp, - file_id const & ident) + file_id const & ident, + attrmap_t const & attrs) { I(!ident.inner()().empty()); node_t n = this->detach(dp); I(is_file_t(n)); I(downcast_to_file_t(n)->content == ident); + I(n->attrs == attrs); graveyard.insert(n); if (global_sanity.debug) { check_sane(); - } + } } -node_t +node_t cset::detach(path_vec_t const & path) { dir_t old_parent; @@ -1665,16 +1793,19 @@ return new_node; // NB: it is important *not* to check sanity here, as we might have - // entered a transient state where detached grandchildren are + // entered a transient state where detached grandchildren are // not pointing to the graveyard because they're about to be // re-attached. } -void +void cset::attach(path_vec_t const & path, node_t n) { + // L(F("attach: adding child %x at path '%s'") % n.get() % path.to_file_path()); dir_t new_dir = get_writable_dir_containing(path); + // L(F("attach: got writable dir %x") % new_dir.get()); + new_dir->add_child(path.leaf, n); set_new_parent(n, new_dir, path.leaf); @@ -1685,22 +1816,25 @@ } -void -cset::add_dir(path_vec_t const & dp) +void +cset::add_dir(path_vec_t const & dp, + attrmap_t const & attrs) { dir_t old_dir(new dir_node()); incubator.insert(old_dir); dir_t new_dir(downcast_to_dir_t(old_dir->shallow_copy())); + new_dir->attrs = attrs; dir_t new_parent = get_writable_dir_containing(dp); + new_parent->add_child(dp.leaf, new_dir); set_new_parent(new_dir, new_parent, dp.leaf); old_to_new[weak_node_t(old_dir)] = weak_node_t(new_dir); new_to_old[weak_node_t(new_dir)] = weak_node_t(old_dir); - // L(F("added dir mapping old 0x%x -> new 0x%x\n") - // % old_dir.get() + // L(F("added dir mapping old %x -> new %x\n") + // % old_dir.get() // % new_dir.get()); if (global_sanity.debug) @@ -1710,9 +1844,10 @@ } -void +void cset::add_file(path_vec_t const & fp, - file_id const & ident) + file_id const & ident, + attrmap_t const & attrs) { I(!ident.inner()().empty()); file_t old_file(new file_node()); @@ -1720,15 +1855,17 @@ file_t new_file(downcast_to_file_t(old_file->shallow_copy())); new_file->content = ident; + new_file->attrs = attrs; dir_t new_parent = get_writable_dir_containing(fp); + new_parent->add_child(fp.leaf, new_file); set_new_parent(new_file, new_parent, fp.leaf); old_to_new[weak_node_t(old_file)] = weak_node_t(new_file); new_to_old[weak_node_t(new_file)] = weak_node_t(old_file); - // L(F("added file mapping old 0x%x -> new 0x%x\n") - // % old_file.get() + // L(F("added file mapping old %x -> new %x\n") + // % old_file.get() // % new_file.get()); if (global_sanity.debug) @@ -1738,14 +1875,14 @@ } -void -cset::apply_delta(path_vec_t const & path, - file_id const & s, +void +cset::apply_delta(path_vec_t const & path, + file_id const & s, file_id const & d) { node_t old_node; file_t old_file, new_file; - + I(!s.inner()().empty()); I(!d.inner()().empty()); @@ -1763,9 +1900,9 @@ } -void -cset::set_attr(path_vec_t const & path, - attr_name const & name, +void +cset::set_attr(path_vec_t const & path, + attr_name const & name, attr_val const & val) { get_writable_node(path)->set_attr(name, val); @@ -1777,8 +1914,8 @@ } -void -cset::clear_attr(path_vec_t const & path, +void +cset::clear_attr(path_vec_t const & path, string const & name) { get_writable_node(path)->clear_attr(name); @@ -1814,21 +1951,32 @@ file_id dst_id; }; - struct path_with_content + struct path_with_attrs { path_vec_t path; + attrmap_t attrs; + bool operator<(path_with_attrs const & other) const + { + return path < other.path; + } + }; + + struct path_with_attrs_and_content + { + path_vec_t path; + attrmap_t attrs; file_id content; - bool operator<(path_with_content const & other) const + bool operator<(path_with_attrs_and_content const & other) const { return path < other.path; } }; - set files_deleted; - set dirs_deleted; + set files_deleted; + set dirs_deleted; vector nodes_renamed; - set dirs_added; - set files_added; + set dirs_added; + set files_added; vector deltas_applied; vector attrs_changed; @@ -1866,14 +2014,14 @@ void sort_for_inverse_direction() { - sort(nodes_renamed.begin(), nodes_renamed.end(), + sort(nodes_renamed.begin(), nodes_renamed.end(), rename_inverse_sorter()); sort(deltas_applied.begin(), deltas_applied.end(), delta_inverse_sorter()); sort(attrs_changed.begin(), attrs_changed.end(), attr_inverse_sorter()); } - + }; @@ -1881,31 +2029,31 @@ play_back_replay_record(replay_record const & rr, change_consumer & cc) { - for (set::reverse_iterator i + for (set::reverse_iterator i = rr.files_deleted.rbegin(); i != rr.files_deleted.rend(); ++i) - cc.delete_file(i->path, i->content); - - for (set::reverse_iterator i = rr.dirs_deleted.rbegin(); + cc.delete_file(i->path, i->content, i->attrs); + + for (set::reverse_iterator i = rr.dirs_deleted.rbegin(); i != rr.dirs_deleted.rend(); ++i) - cc.delete_dir(*i); + cc.delete_dir(i->path, i->attrs); - for (vector::const_iterator i + for (vector::const_iterator i = rr.nodes_renamed.begin(); i != rr.nodes_renamed.end(); ++i) cc.rename_node(i->src, i->dst); cc.finalize_renames(); - for (set::const_iterator i = rr.dirs_added.begin(); + for (set::const_iterator i = rr.dirs_added.begin(); i != rr.dirs_added.end(); ++i) - cc.add_dir(*i); + cc.add_dir(i->path, i->attrs); - for (set::const_iterator i + for (set::const_iterator i = rr.files_added.begin(); i != rr.files_added.end(); ++i) - cc.add_file(i->path, i->content); + cc.add_file(i->path, i->content, i->attrs); - for (vector::const_iterator i + for (vector::const_iterator i = rr.deltas_applied.begin(); i != rr.deltas_applied.end(); ++i) cc.apply_delta(i->paths.dst, i->src_id, i->dst_id); @@ -1932,32 +2080,32 @@ change_consumer & cc) { - for (set::reverse_iterator i + for (set::reverse_iterator i = rr.files_added.rbegin(); i != rr.files_added.rend(); ++i) - cc.delete_file(i->path, i->content); + cc.delete_file(i->path, i->content, i->attrs); - for (set::reverse_iterator i = rr.dirs_added.rbegin(); + for (set::reverse_iterator i = rr.dirs_added.rbegin(); i != rr.dirs_added.rend(); ++i) - cc.delete_dir(*i); + cc.delete_dir(i->path, i->attrs); - for (vector::const_iterator i + for (vector::const_iterator i = rr.nodes_renamed.begin(); i != rr.nodes_renamed.end(); ++i) cc.rename_node(i->dst, i->src); cc.finalize_renames(); - for (set::const_iterator i = rr.dirs_deleted.begin(); + for (set::const_iterator i = rr.dirs_deleted.begin(); i != rr.dirs_deleted.end(); ++i) - cc.add_dir(*i); + cc.add_dir(i->path, i->attrs); - for (set::const_iterator i + for (set::const_iterator i = rr.files_deleted.begin(); i != rr.files_deleted.end(); ++i) - cc.add_file(i->path, i->content); - + cc.add_file(i->path, i->content, i->attrs); - for (vector::const_iterator i + + for (vector::const_iterator i = rr.deltas_applied.begin(); i != rr.deltas_applied.end(); ++i) cc.apply_delta(i->paths.src, i->dst_id, i->src_id); @@ -1988,7 +2136,7 @@ // second accumulates nodes in the toplogical order they appear in // the dst map. in both cases we append any interesting nodes to // vectors which we then replay as blocks of related changes. - + for (dfs_iter i(cs.old_mfest.root); !i.finished(); ++i) { // in this pass we accumulate nodes_deleted. the "self" node is a @@ -1999,21 +2147,27 @@ cs.get_corresponding_new_node(self, other); // Cheap short circuit - if (self.get() == other.get()) + if (self == other) continue; - + if (cs.is_killed(other)) { path_vec_t pth; cs.get_old_path(self, pth); if (is_dir_t(self)) - rr.dirs_deleted.insert(pth); + { + replay_record::path_with_attrs pa; + pa.path = pth; + pa.attrs = self->attrs; + rr.dirs_deleted.insert(pa); + } else { - replay_record::path_with_content pc; + replay_record::path_with_attrs_and_content pc; pc.path = pth; pc.content = downcast_to_file_t(self)->content; + pc.attrs = self->attrs; rr.files_deleted.insert(pc); } } @@ -2033,7 +2187,7 @@ cs.get_corresponding_old_node(self, other); // Cheap short circuit - if (self.get() == other.get()) + if (self == other) continue; if (cs.is_unborn(other)) @@ -2043,18 +2197,22 @@ if (is_dir_t(self)) { I(is_dir_t(other)); - rr.dirs_added.insert(pth); + replay_record::path_with_attrs pa; + pa.path = pth; + pa.attrs = self->attrs; + rr.dirs_added.insert(pa); } else { I(is_file_t(self)); I(is_file_t(other)); - replay_record::path_with_content pc; + replay_record::path_with_attrs_and_content pc; pc.path = pth; pc.content = downcast_to_file_t(self)->content; + pc.attrs = self->attrs; rr.files_added.insert(pc); } - } + } else if (cs.is_live(other)) { dirent_t self_entry, other_entry; @@ -2088,20 +2246,22 @@ rr.deltas_applied.push_back(del); } } - - // attrs which were changed - if (self->attrs != other->attrs) + + // attrs which were changed on live nodes + if (cs.is_live(self) + && cs.is_live(other) + && (self->attrs != other->attrs)) { replay_record::live_paths pths; cs.get_old_path(other, pths.src); i.path(pths.dst); - for (map::const_iterator a = self->attrs.begin(); + for (attrmap_t::const_iterator a = self->attrs.begin(); a != self->attrs.end(); ++a) { attr_val oldval; - map::const_iterator b = other->attrs.find(a->first); + attrmap_t::const_iterator b = other->attrs.find(a->first); if (b != other->attrs.end()) oldval = b->second; @@ -2116,12 +2276,12 @@ } } - for (map::const_iterator b = other->attrs.begin(); + for (attrmap_t::const_iterator b = other->attrs.begin(); b != other->attrs.end(); ++b) { attr_val newval; - map::const_iterator a = self->attrs.find(b->first); + attrmap_t::const_iterator a = self->attrs.find(b->first); if (a != self->attrs.end()) newval = a->second; @@ -2140,7 +2300,7 @@ } -void +void cset::replay_changes(change_consumer & cc) const { replay_record rr; @@ -2149,7 +2309,7 @@ } -void +void cset::replay_inverse_changes(change_consumer & cc) const { replay_record rr; @@ -2209,7 +2369,7 @@ // in G[F.X] (or more likely, if one node is contracted into a parent // of the other, such that there is only one head), then we have // merged F.X for free: there were no conflicts on it. -// +// // - otherwise, with a contracted version of G[F.X], we let C[F.X] = // LCAD(G[F.X]) -- an aspect-specific common-ancestor -- and ask the // user for help on A[F.X], C[F.X], B[F.X]. if they refuse, we fail. @@ -2244,30 +2404,46 @@ } -static void +static void parse_cset(basic_io::parser & parser, change_consumer & cc) { while (parser.symp()) { - std::string t1, t2, t3; - if (parser.symp(syms::delete_file)) - { + std::string t1, t2, t3, k, v; + attrmap_t attrs; + if (parser.symp(syms::delete_file)) + { parser.sym(); parser.str(t1); parser.esym(syms::content); parser.hex(t2); + while (parser.symp(syms::attr)) + { + parser.sym(); + parser.str(k); + parser.str(v); + attrs.insert(make_pair(k,v)); + } cc.delete_file(file_path(t1), - file_id(t2)); + file_id(t2), + attrs); } - else if (parser.symp(syms::delete_dir)) - { + else if (parser.symp(syms::delete_dir)) + { parser.sym(); parser.str(t1); - cc.delete_dir(file_path(t1)); + while (parser.symp(syms::attr)) + { + parser.sym(); + parser.str(k); + parser.str(v); + attrs.insert(make_pair(k,v)); + } + cc.delete_dir(file_path(t1), attrs); } - else if (parser.symp(syms::rename_node)) - { + else if (parser.symp(syms::rename_node)) + { parser.sym(); parser.str(t1); parser.esym(syms::to); @@ -2275,20 +2451,35 @@ cc.rename_node(file_path(t1), file_path(t2)); } - else if (parser.symp(syms::add_file)) - { + else if (parser.symp(syms::add_file)) + { parser.sym(); parser.str(t1); parser.esym(syms::content); parser.hex(t2); + while (parser.symp(syms::attr)) + { + parser.sym(); + parser.str(k); + parser.str(v); + attrs.insert(make_pair(k,v)); + } cc.add_file(file_path(t1), - file_id(t2)); + file_id(t2), + attrs); } - else if (parser.symp(syms::add_dir)) - { + else if (parser.symp(syms::add_dir)) + { parser.sym(); parser.str(t1); - cc.add_dir(file_path(t1)); + while (parser.symp(syms::attr)) + { + parser.sym(); + parser.str(k); + parser.str(v); + attrs.insert(make_pair(k,v)); + } + cc.add_dir(file_path(t1), attrs); } else if (parser.symp(syms::patch)) { @@ -2316,7 +2507,7 @@ parser.str(t2); parser.esym(syms::value); parser.str(t3); - cc.set_attr(file_path(t1), + cc.set_attr(file_path(t1), attr_name(t2), attr_val(t3)); } @@ -2326,20 +2517,24 @@ } -struct -cset_printer +struct +cset_printer : public change_consumer { basic_io::printer & printer; cset_printer(basic_io::printer & p) : printer(p) {} virtual void delete_file(path_vec_t const & fp, - file_id const & content); - virtual void delete_dir(path_vec_t const & fp); - virtual void rename_node(path_vec_t const & src, + file_id const & content, + attrmap_t const & attrs); + virtual void delete_dir(path_vec_t const & fp, + attrmap_t const & attrs); + virtual void rename_node(path_vec_t const & src, path_vec_t const & dst); - virtual void add_dir(path_vec_t const & dp); + virtual void add_dir(path_vec_t const & dp, + attrmap_t const & attrs); virtual void add_file(path_vec_t const & fp, - file_id const & ident); + file_id const & ident, + attrmap_t const & attrs); virtual void apply_delta(path_vec_t const & path, file_id const & src, file_id const & dst); @@ -2351,29 +2546,37 @@ }; -void +void cset_printer::delete_file(path_vec_t const & fp, - file_id const & ident) + file_id const & ident, + attrmap_t const & attrs) { basic_io::stanza st; I(!ident.inner()().empty()); st.push_str_pair(syms::delete_file, fp.to_file_path()()); st.push_hex_pair(syms::content, ident.inner()()); + for (attrmap_t::const_iterator i = attrs.begin(); + i != attrs.end(); ++i) + st.push_str_triple (syms::attr, i->first, i->second); printer.print_stanza(st); } -void -cset_printer::delete_dir(path_vec_t const & dp) +void +cset_printer::delete_dir(path_vec_t const & dp, + attrmap_t const & attrs) { basic_io::stanza st; st.push_str_pair(syms::delete_dir, dp.to_file_path()()); + for (attrmap_t::const_iterator i = attrs.begin(); + i != attrs.end(); ++i) + st.push_str_triple (syms::attr, i->first, i->second); printer.print_stanza(st); } -void -cset_printer::rename_node(path_vec_t const & src, +void +cset_printer::rename_node(path_vec_t const & src, path_vec_t const & dst) { basic_io::stanza st; @@ -2383,30 +2586,38 @@ } -void -cset_printer::add_dir(path_vec_t const & dp) +void +cset_printer::add_dir(path_vec_t const & dp, + attrmap_t const & attrs) { basic_io::stanza st; st.push_str_pair(syms::add_dir, dp.to_file_path()()); + for (attrmap_t::const_iterator i = attrs.begin(); + i != attrs.end(); ++i) + st.push_str_triple (syms::attr, i->first, i->second); printer.print_stanza(st); } -void +void cset_printer::add_file(path_vec_t const & fp, - file_id const & ident) + file_id const & ident, + attrmap_t const & attrs) { basic_io::stanza st; I(!ident.inner()().empty()); st.push_str_pair(syms::add_file, fp.to_file_path()()); st.push_hex_pair(syms::content, ident.inner()()); + for (attrmap_t::const_iterator i = attrs.begin(); + i != attrs.end(); ++i) + st.push_str_triple (syms::attr, i->first, i->second); printer.print_stanza(st); } -void -cset_printer::apply_delta(path_vec_t const & path, - file_id const & src, +void +cset_printer::apply_delta(path_vec_t const & path, + file_id const & src, file_id const & dst) { basic_io::stanza st; @@ -2419,8 +2630,8 @@ } -void -cset_printer::clear_attr(path_vec_t const & path, +void +cset_printer::clear_attr(path_vec_t const & path, attr_name const & attr) { basic_io::stanza st; @@ -2430,8 +2641,8 @@ } -void -cset_printer::set_attr(path_vec_t const & path, +void +cset_printer::set_attr(path_vec_t const & path, attr_name const & attr, attr_val const & val) { @@ -2466,11 +2677,11 @@ basic_io::printer pr(oss); cset_printer cpr(pr); cs.replay_changes(cpr); - dat = data(oss.str()); + dat = data(oss.str()); } void -parse_mfest(basic_io::parser & p, +parse_mfest(basic_io::parser & p, mfest & m) { cset c; @@ -2478,12 +2689,20 @@ std::string pth, ident, n, v; while (p.symp()) { + attrmap_t attrs; if (p.symp(syms::dir)) { p.sym(); p.str(pth); + while (p.symp(syms::attr)) + { + p.sym(); + p.str(n); + p.str(v); + attrs.insert(make_pair(n,v)); + } // L(F("read dir %s\n") % pth); - cs.add_dir(file_path(pth)); + cs.add_dir(file_path(pth), attrs); } else if (p.symp(syms::file)) { @@ -2491,22 +2710,19 @@ p.str(pth); p.esym(syms::content); p.hex(ident); + while (p.symp(syms::attr)) + { + p.sym(); + p.str(n); + p.str(v); + attrs.insert(make_pair(n,v)); + } // L(F("read file %s\n") % pth); - cs.add_file(file_path(pth), file_id(ident)); + cs.add_file(file_path(pth), file_id(ident), attrs); } else break; - - // if we got here, we read either a dir or file - while (p.symp(syms::attr)) - { - p.sym(); - p.str(n); - p.str(v); - // L(F("read attr %s : %s = %s\n") % pth % n % v); - cs.set_attr(file_path(pth), n, v); - } } - + // now store the results into the provided mfest m.reset(c.new_mfest); } @@ -2537,7 +2753,7 @@ st.push_hex_pair(syms::content, ftmp->content.inner()()); // L(F("printing file %s\n") % fp); } - for (map::const_iterator j = curr->attrs.begin(); + for (attrmap_t::const_iterator j = curr->attrs.begin(); j != curr->attrs.end(); ++j) { I(!j->second.empty()); @@ -2545,7 +2761,7 @@ st.push_str_triple(syms::attr, j->first, j->second); } p.print_stanza(st); - } + } } @@ -2571,7 +2787,7 @@ std::ostringstream oss; basic_io::printer pr(oss); print_mfest(pr, mf); - dat = data(oss.str()); + dat = data(oss.str()); } @@ -2597,7 +2813,7 @@ static string wordchars = "abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; static unsigned tick = 0; string tmp; - do + do { tmp += wordchars[rand() % wordchars.size()]; } @@ -2615,6 +2831,16 @@ return file_id(tmp); } + attrmap_t new_attrs() + { + attrmap_t tmp; + while (tmp.size() < 3 && !flip(3)) + { + tmp.insert(make_pair(new_word(), new_word())); + } + return tmp; + } + dirent_t new_entry() { return dirent_t(new_word()); @@ -2625,257 +2851,202 @@ return (rand() % n) == 0; } - attr_name pick_attr(node_t n) + attr_name pick_attr(attrmap_t const & attrs) { - I(n->has_attrs()); vector tmp; - for (map::const_iterator i = n->attrs.begin(); - i != n->attrs.end(); ++i) + for (attrmap_t::const_iterator i = attrs.begin(); + i != attrs.end(); ++i) { tmp.push_back(i->first); } return tmp[rand() % tmp.size()]; } - enum action - { - add_a_node = 0, - delete_a_node = 1, - rename_a_node = 2, - apply_a_delta = 3, - set_an_attribute = 4, - clear_an_attribute = 5, - - number_of_actions = 6 - }; - - void inspect(mfest const & m, - bool & has_nonroot_nodes, - bool & has_attrs) + bool parent_of(path_vec_t const & p, + path_vec_t const & c) { + vector pv, cv; + copy(p.dir.begin(), p.dir.end(), back_inserter(pv)); + pv.push_back(p.leaf); - has_nonroot_nodes = false; - has_attrs = false; + copy(c.dir.begin(), c.dir.end(), back_inserter(cv)); + cv.push_back(c.leaf); - for (bfs_iter i(m.root); !i.finished(); ++i) + bool is_parent = false; + + if (pv.size() <= cv.size()) { - if ((*i).get() != m.root.get()) - has_nonroot_nodes = true; + vector::const_iterator c_anchor = + search(cv.begin(), cv.end(), + pv.begin(), pv.end()); - if ((*i)->has_attrs()) - has_attrs = true; - - if (has_nonroot_nodes - && has_attrs) - return; + is_parent = (c_anchor == cv.begin()); } + + L(F("path '%s' is%s parent of '%s'") + % p.to_file_path() + % (is_parent ? "" : " not") + % c.to_file_path()); + + return is_parent; } - - void perform_random_action(cset & c) + + bool pick_random_live_undamaged_nonroot_paths(cset & c, + bool & is_dir, + bool & dir_is_empty, + path_vec_t & old_path, + path_vec_t & new_path, + attrmap_t & attrs, + file_id & content) { - hash_set parents; - path_vec_t pv_a, pv_b; - file_path fp_a, fp_b; + size_t count = 0; + for (dfs_iter i(c.old_mfest.root); !i.finished(); ++i) + ++count; - bool has_nonroot_nodes; - bool has_attrs; + if (count == 0) + return false; - change_consumer & cs = c; + size_t choice = rand() % count; - inspect(c.new_mfest, - has_nonroot_nodes, - has_attrs); + for (dfs_iter i(c.old_mfest.root); !i.finished(); ++i) + { + if (choice-- == 0) + { + node_t n(*i), new_n; + I(c.is_live(n)); + c.get_corresponding_new_node(weak_node_t(n), new_n); - vector nodes; - get_nodes(c.new_mfest, nodes); + if (!c.is_live(new_n)) + return false; + if (n->attrs != new_n->attrs) + return false; + + if (is_file_t(n)) + { + file_t f = downcast_to_file_t(n); + file_t new_f = downcast_to_file_t(new_n); + + if (! (f->content == new_f->content)) + return false; + is_dir = false; + content = f->content; + } + else + { + is_dir = true; + dir_is_empty = downcast_to_dir_t(new_n)->entries.empty(); + } + + i.path(old_path); + c.get_new_path(new_n, new_path); + attrs = n->attrs; + return true; + } + } + return false; + } + + void perform_random_action(cset & cs) + { + change_consumer & c = cs; + bool did_something = false; - while (! did_something) - { - switch (static_cast(rand() % static_cast(number_of_actions))) + while (!did_something) + { + bool is_dir, new_dir_is_empty; + path_vec_t old_path, new_path; + attrmap_t attrs; + file_id content; + if (!pick_random_live_undamaged_nonroot_paths(cs, is_dir, new_dir_is_empty, + old_path, new_path, + attrs, content)) { - - case add_a_node: - { - node_t n = random_node(c, false, nodes, pv_a, parents); - if (is_dir_t(n) && flip()) - { + // Must add, couldn't find anything to work with + if (flip()) + c.add_dir(file_path(new_word()), new_attrs()); + else + c.add_file(file_path(new_word()), + file_id(new_ident()), + new_attrs()); + did_something = true; + } + else + { + switch (rand() % 7) + { + default: + case 0: + case 1: + case 2: + if (is_dir && flip()) // add a child of an existing entry - pv_a /= new_entry(); - } - else - { + new_path /= new_entry(); + else // add a sibling of an existing entry - pv_a.leaf = new_entry(); - } - - fp_a = pv_a.to_file_path(); + new_path.leaf = new_entry(); - if (flip()) - cs.add_dir(fp_a); - else - { - cs.add_file(fp_a, new_ident()); - } - did_something = true; - } - break; - - case delete_a_node: - { - if (has_nonroot_nodes) - { - node_t n = random_node(c, false, nodes, pv_a, parents); - - if (c.is_live(n) - && (n.get() != c.new_mfest.root.get()) - && (is_file_t(n) || (is_dir_t(n) - && downcast_to_dir_t(n)->entries.empty()))) - { - fp_a = pv_a.to_file_path(); - if (is_dir_t(n)) - cs.delete_dir(fp_a); - else - cs.delete_file(fp_a, downcast_to_file_t(n)->content); - did_something = true; - } - } - break; - - } + if (flip()) + c.add_dir(new_path.to_file_path(), new_attrs()); + else + c.add_file(new_path.to_file_path(), new_ident(), new_attrs()); + did_something = true; + break; - case rename_a_node: - { - if (has_nonroot_nodes) + case 3: + case 4: { - node_t src = random_node(c, true, nodes, pv_a, parents); - node_t dst = random_node(c, false, nodes, pv_b, parents); - - // we want to be sure we're not moving src into one of - // its own children; this is the same as saying that - // src isn't in dst's parents - - if (c.is_live(src) - && c.is_live(dst) - && (parents.find(src.get()) == parents.end())) - { - if (is_dir_t(dst)) - { - pv_b /= new_entry(); - } + bool target_is_dir, target_dir_is_empty; + path_vec_t target_old_path, target_new_path; + attrmap_t a2; + file_id f2; + if (pick_random_live_undamaged_nonroot_paths(cs, target_is_dir, target_dir_is_empty, + target_old_path, target_new_path, + a2, f2)) + { + if (target_is_dir && flip()) + target_new_path /= new_entry(); else + target_new_path.leaf = new_entry(); + + if (!(is_dir && parent_of(new_path, target_new_path))) { - pv_b.leaf = new_entry(); - } - fp_a = pv_a.to_file_path(); - fp_b = pv_b.to_file_path(); - cs.rename_node(fp_a, fp_b); - cs.finalize_renames(); - did_something = true; - } - } - } - break; - - case apply_a_delta: - { - if (has_nonroot_nodes) - { - node_t f = random_node(c, false, nodes, pv_a, parents); - if (c.is_live(f) && is_file_t(f)) - { - cs.apply_delta(pv_a.to_file_path(), - downcast_to_file_t(f)->content, - new_ident()); - did_something = true; - } - } - } - break; - - case set_an_attribute: - { - if (has_nonroot_nodes) - { - node_t n = random_node(c, false, nodes, pv_a, parents); - if (n.get() != c.new_mfest.root.get()) - { - fp_a = pv_a.to_file_path(); - cs.set_attr(fp_a, new_word(), new_word()); - did_something = true; - } - } - } - break; - - case clear_an_attribute: - { - if (has_nonroot_nodes && has_attrs) - { - node_t n = random_node(c, false, nodes, pv_a, parents); - if (n.get() != c.new_mfest.root.get()) - { - fp_a = pv_a.to_file_path(); - if (n->has_attrs()) - { - cs.clear_attr(fp_a, pick_attr(n)); + c.rename_node(old_path.to_file_path(), + target_new_path.to_file_path()); + c.finalize_renames(); did_something = true; } } } - } - break; - - case number_of_actions: - break; + break; + case 5: + if (is_dir) + { + if (new_dir_is_empty) + { + c.delete_dir(old_path.to_file_path(), attrs); + did_something = true; + } + } + else + { + c.delete_file(old_path.to_file_path(), content, attrs); + did_something = true; + } + break; + + case 6: + if (!attrs.empty() && flip()) + c.clear_attr(new_path.to_file_path(), pick_attr(attrs)); + else + c.set_attr(new_path.to_file_path(), new_word(), new_word()); + did_something = true; + break; + } } } } - - void get_nodes(mfest const & m, vector & nodes) - { - nodes.clear(); - for (bfs_iter i(m.root); !i.finished(); ++i) - nodes.push_back(*i); - } - - node_t random_node(cset const & c, - bool old, - vector & nodes, - path_vec_t & pv, - hash_set & parents) - { - parents.clear(); - - node_t result = nodes[rand() % nodes.size()]; - - node *r = (old ? c.old_mfest.root.get() : c.new_mfest.root.get()); - parents.insert(r); - - node_t tmp = result; - while (tmp.get() != r) - { - parents.insert(tmp.get()); - dir_t parent; - dirent_t entry; - if (old) - c.get_old_parent(tmp, parent, entry); - else - c.get_new_parent(tmp, parent, entry); - tmp = parent; - } - - if (result.get() != r) - if (old) - c.get_old_path(result, pv); - else - c.get_new_path(result, pv); - - return result; - } - - }; static void @@ -2886,26 +3057,48 @@ change_automaton aut; for (int i = 0; i < 1000; ++i) { + data tmp; //L(F("automaton cycle %d, changeset 1\n") % i); cset cs1(m1); aut.perform_random_action(cs1); + aut.perform_random_action(cs1); + aut.perform_random_action(cs1); + write_mfest(cs1.new_mfest, tmp); + std::cerr << "1 [[[" << tmp << "]]]" << std::endl; + //L(F("automaton cycle %d, changeset 2\n") % i); cset cs2(cs1.new_mfest); aut.perform_random_action(cs2); + aut.perform_random_action(cs2); + aut.perform_random_action(cs2); + write_mfest(cs2.new_mfest, tmp); + std::cerr << "2 [[[" << tmp << "]]]" << std::endl; + //L(F("automaton cycle %d, changeset 3\n") % i); cset cs3(cs2.new_mfest); aut.perform_random_action(cs3); + aut.perform_random_action(cs3); + aut.perform_random_action(cs3); + write_mfest(cs3.new_mfest, tmp); + std::cerr << "3 [[[" << tmp << "]]]" << std::endl; + cset csA, csB; //L(F("automaton cycle %d, concatenation: 1+2\n") % i); concatenate_changesets(cs1, cs2, csA); + write_mfest(csA.new_mfest, tmp); + std::cerr << "4 [[[" << tmp << "]]]" << std::endl; + //L(F("automaton cycle %d, concatenation: (1+2)+3\n") % i); concatenate_changesets(csA, cs3, csB); + write_mfest(csB.new_mfest, tmp); + std::cerr << "5 [[[" << tmp << "]]]" << std::endl; + m1.reset(csB.new_mfest); } } @@ -2949,28 +3142,31 @@ spin_mfest(cs2.old_mfest); spin_mfest(cs2.new_mfest); cs1 = cs2; - } + } } file_id null_file_id; -static void +static void basic_cset_test() { try { cset c; - change_consumer & cs = c; - cs.add_dir(file_path("usr")); - cs.add_dir(file_path("usr/bin")); + attrmap_t ea; + change_consumer & cs = c; + cs.add_dir(file_path("usr"), ea); + cs.add_dir(file_path("usr/bin"), ea); cs.add_file(file_path("usr/bin/cat"), - file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc")), + ea); - cs.add_dir(file_path("usr/local")); - cs.add_dir(file_path("usr/local/bin")); + cs.add_dir(file_path("usr/local"), ea); + cs.add_dir(file_path("usr/local/bin"), ea); cs.add_file(file_path("usr/local/bin/dog"), - file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc")), + ea); spin_cset(c); } @@ -2986,12 +3182,12 @@ -void +void add_cset_tests(test_suite * suite) { I(suite); suite->add(BOOST_TEST_CASE(&automaton_cset_test)); - suite->add(BOOST_TEST_CASE(&basic_cset_test)); + // suite->add(BOOST_TEST_CASE(&basic_cset_test)); }