diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ebb8dabc5f..55eb8fa161 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -158,7 +158,7 @@ oldest-supported-compiler-job:
     GIT_SUBMODULE_STRATEGY: none
     # DO NOT change this version number without updating the README to reflect
     # the requirement bump.
-    COMPILER_VERSION: 9
+    COMPILER_VERSION: 10
 
 
 # We define one job to do the Docker container build
@@ -319,7 +319,11 @@ report-job:
 docs-job:
     interruptible: true
     stage: build
+    before_script:
+      - sudo apt-get -q -y update
+      - sudo apt-get -q -y install --no-upgrade doxygen graphviz rsync git
     script:
       - doc/publish-docs.sh
+    after_script: []
    
   
diff --git a/BOTS.md b/BOTS.md
new file mode 100644
index 0000000000..2c35275ac0
--- /dev/null
+++ b/BOTS.md
@@ -0,0 +1,75 @@
+# VG Project Notes
+
+## Building
+- New `.cpp` files auto-discovered
+- Build with `make -j8` or `make obj/whatever.o` to build just one .o.
+- You may be getting errors from `clangd`. If these errors seem spurious, stop and demand a `clangd` that works properly.
+
+## Testing
+
+### Running Bash-TAP Tests
+Use `prove -v` (not `bash`) to execute Bash-TAP tests. This provides proper test harness output and better error reporting.
+
+**Important**: Run `prove` from the `test/` directory:
+```bash
+cd test
+prove -v t/26_deconstruct.t
+```
+
+### Running Unit Tests
+To run all unit tests:
+```bash
+./bin/vg test
+```
+- `./bin/vg test "[tag]"` runs tests matching a tag
+
+#### Writing Unit Tests
+- Framework: Catch v2 (header-only)
+- Include: `#include "catch.hpp"` (in `src/unittest/catch.hpp`)
+- Macros: `TEST_CASE("name", "[tags]")`, `SECTION("name")`, `REQUIRE(cond)`
+- Namespace: `vg::unittest`
+- Directory: `src/unittest/`
+
+### Running All Tests
+```bash
+make test
+```
+
+## Writing Code
+
+### HandleGraph API
+The interfaces in libhandlegraph model a bidirected sequence graph (where nodes have DNA sequences and edges can connect to either the start or end of each involved node).
+
+#### Core types
+- `handle_t` - opaque 64-bit value
+- `nid_t` - node ID type
+- `edge_t` = `pair<handle_t, handle_t>`
+
+#### Key HandleGraph methods
+- `get_handle(nid_t, bool is_reverse=false)` → `handle_t`
+- `get_id(handle_t)` → `nid_t`
+- `get_is_reverse(handle_t)` → `bool`
+- `flip(handle_t)` → `handle_t` (toggle orientation)
+- `get_sequence(handle_t)` → `string` (in handle's orientation)
+- `follow_edges(handle_t, bool go_left, iteratee)` - iterate neighbors
+- `for_each_handle(iteratee, bool parallel=false)` - iterate all nodes
+- `for_each_edge(iteratee, bool parallel=false)` - iterate all edges
+- `has_edge(handle_t left, handle_t right)` → `bool`
+
+#### MutableHandleGraph additions
+- `create_handle(string seq)` / `create_handle(string seq, nid_t id)` → `handle_t`
+- `create_edge(handle_t left, handle_t right)`
+- `destroy_handle(handle_t)` / `destroy_edge(handle_t, handle_t)`
+
+#### HandleGraph algorithms
+- Things like `topological_sort.hpp` and copy_graph.hpp` are in `deps/libhandlegraph/src/include/handlegraph/algorithms`.
+
+#### bdsg::HashGraph
+- Header: `deps/libbdsg/bdsg/include/bdsg/hash_graph.hpp`
+- Implements MutablePathMutableHandleGraph
+- Go-to handlegraph implementation to use
+- In libbdsg
+
+### Utilities
+- `reverse_complement(string)` → `string` in src/utility.hpp
+
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 0000000000..1a1007d91a
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+BOTS.md
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 6506208d58..6159297339 100644
--- a/Makefile
+++ b/Makefile
@@ -104,7 +104,8 @@ ifeq ($(shell uname -s),Darwin)
     LD_UTIL_RPATH_FLAGS=""
 
     # Homebrew installs a Protobuf that uses an Abseil that is built with C++17, so we need to build with at least C++17
-    CXX_STANDARD?=17
+	# C++20 for spaceship operator and ranges
+    CXX_STANDARD?=20
 
     # We may need libraries from Macports
     ifeq ($(shell if [ -d /opt/local/lib ];then echo 1;else echo 0;fi), 1)
@@ -229,8 +230,9 @@ else
     $(info Compiler $(CXX) is assumed to be GCC)
 
 	# gbwtgraph uses inline variables and our oldest supported compiler has
-	# C++17, so we should use C++17
-    CXX_STANDARD?=17
+	# C++17, so we should use at least C++17.
+    # C++20 for spaceship operator and ranges
+    CXX_STANDARD?=20
 
     # Set an rpath for vg and dependency utils to find installed libraries
     LD_UTIL_RPATH_FLAGS="-Wl,-rpath,$(CWD)/$(LIB_DIR)"
@@ -820,7 +822,7 @@ $(INC_DIR)/dynamic/dynamic.hpp: $(DYNAMIC_DIR)/include/dynamic/*.hpp $(DYNAMIC_D
 	+mkdir -p $(INC_DIR)/dynamic && cp -r $(CWD)/$(DYNAMIC_DIR)/include/dynamic/* $(INC_DIR)/dynamic/
 
 $(INC_DIR)/sparsehash/sparse_hash_map: $(wildcard $(SPARSEHASH_DIR)/**/*.cc) $(wildcard $(SPARSEHASH_DIR)/**/*.h)
-	+cd $(SPARSEHASH_DIR) && ./autogen.sh && LDFLAGS="$(LD_LIB_DIR_FLAGS) $(LDFLAGS)" ./configure --prefix=$(CWD) $(FILTER) && $(MAKE) $(FILTER) && $(MAKE) install
+	+cd $(SPARSEHASH_DIR) && ./autogen.sh && LDFLAGS="$(LD_LIB_DIR_FLAGS) $(LDFLAGS)" ./configure --prefix=$(CWD) $(FILTER) && $(MAKE) src/sparsehash/internal/sparseconfig.h $(FILTER) && $(MAKE) install-data $(FILTER)
 
 $(INC_DIR)/sparsepp/spp.h: $(wildcard $(SPARSEPP_DIR)/sparsepp/*.h)
 	+cp -r $(SPARSEPP_DIR)/sparsepp $(INC_DIR)/
diff --git a/README.md b/README.md
index a3e1d5e4cd..2c616f69fe 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ On other distros, or if you do not have root access, you will need to perform th
                          liblzma-dev liblz4-dev libffi-dev libcairo-dev libboost-all-dev \
                          libzstd-dev pybind11-dev python3-pybind11 libssl-dev kmc
                          
-At present, you will need GCC version 9 or greater, with support for C++17, to compile vg. (Check your version with `gcc --version`.) GCC up to 11.4.0 is supported.
+At present, you will need GCC version 10 or greater, with support for C++20, to compile vg. (Check your version with `gcc --version`.) GCC up to 11.4.0 is supported.
 
 Other libraries may be required. Please report any build difficulties.
 
diff --git a/deps/gbwt b/deps/gbwt
index d127b9aff4..9e92e4f11b 160000
--- a/deps/gbwt
+++ b/deps/gbwt
@@ -1 +1 @@
-Subproject commit d127b9aff47f7212603e56ff3e0383a1b257e0aa
+Subproject commit 9e92e4f11bafcb1df92df9adf5991199c0f09f61
diff --git a/deps/gbwtgraph b/deps/gbwtgraph
index bcc248bff4..8649f806a1 160000
--- a/deps/gbwtgraph
+++ b/deps/gbwtgraph
@@ -1 +1 @@
-Subproject commit bcc248bff469bcb6d69a9e37be1dab43580f2417
+Subproject commit 8649f806a1fb332ad9090a76c6f0b7583219ef0a
diff --git a/deps/libbdsg b/deps/libbdsg
index e74fb663a5..cf3dce919a 160000
--- a/deps/libbdsg
+++ b/deps/libbdsg
@@ -1 +1 @@
-Subproject commit e74fb663a5f85bc1f76d159b2b3a3691ed85862f
+Subproject commit cf3dce919a0085f4cfac9c290a0f750b578f2573
diff --git a/doc/publish-docs.sh b/doc/publish-docs.sh
index c17bd438e6..c3c2f839fb 100755
--- a/doc/publish-docs.sh
+++ b/doc/publish-docs.sh
@@ -26,9 +26,26 @@ COMMIT_AUTHOR_EMAIL="anovak+vgdocbot@soe.ucsc.edu"
 # See <https://gist.github.com/nicktoumpelis/11214362#file-repo-rinse-sh-L2>
 git submodule foreach --recursive git clean -xfd
 
-# Find all the submodules that Doxygen wants to look at and make sure we have
-# those.
-cat Doxyfile  | grep "^INPUT *=" | cut -f2 -d'=' | tr ' ' '\n' | grep "^ *deps" | sed 's_ *\(deps/[^/]*\).*_\1_' | sort | uniq | xargs -n 1 git submodule update --init --recursive
+# Find all the submodules that Doxygen wants to look at and make sure we have them, freshly.
+#
+# The CI workspace can carry these submodules with stale origin URLs (left pointing at a local mirror
+# that is not a valid repo on this runner). `git submodule deinit` clears the working tree and the
+# superproject config entry, but it leaves the cached git directory under .git/modules/<path> in place,
+# and `git submodule update --init` then reuses that stale gitdir (stale file:// origin + stale refs)
+# instead of cloning fresh -- so the pinned commit isn't found and git falls back to a file-transport
+# fetch that is (correctly) blocked. So we remove both the working tree AND the cached gitdir, forcing a
+# clean clone from the canonical https URL in .gitmodules. Every pinned commit is a branch tip on github
+# and the runner has https access, so no local ("file") transport is ever needed; protocol.file.allow=never
+# enforces that as defense-in-depth (CVE-2022-39253). We only recurse where Doxygen needs a nested tree:
+# deps/libvgio/deps (whose only nested submodule is vgteam/libhandlegraph).
+GITDIR=$(git rev-parse --git-dir)
+DOXYGEN_DEPS=$(cat Doxyfile | grep "^INPUT *=" | cut -f2 -d'=' | tr ' ' '\n' | grep "^ *deps" | sed 's_ *\(deps/[^/]*\).*_\1_' | sort | uniq)
+for dep in ${DOXYGEN_DEPS}; do
+    git submodule deinit -f -- "${dep}" || true
+    rm -rf "${dep}" "${GITDIR}/modules/${dep}"
+done
+echo "${DOXYGEN_DEPS}" | xargs -n 1 git -c protocol.file.allow=never submodule update --init
+git -c protocol.file.allow=never submodule update --init --recursive deps/libvgio
 
 # Build the documentation.
 # Assumes we are running in the repo root.
diff --git a/src/cactus.cpp b/src/cactus.cpp
index 6179663968..49eab63294 100644
--- a/src/cactus.cpp
+++ b/src/cactus.cpp
@@ -999,8 +999,8 @@ VG cactus_to_vg(stCactusGraph* cactus_graph) {
     return vg_graph;
 }
 
-VG cactusify(VG& graph) {
-    if (graph.size() == 0) {
+VG cactusify(const PathHandleGraph& graph) {
+    if (graph.get_node_count() == 0) {
         return VG();
     }
     auto parts = handle_graph_to_cactus(graph, unordered_set<string>());
diff --git a/src/cactus.hpp b/src/cactus.hpp
index 36d53f2fab..21cfd8ebc7 100644
--- a/src/cactus.hpp
+++ b/src/cactus.hpp
@@ -46,7 +46,7 @@ VG cactus_to_vg(stCactusGraph* cactus_graph);
 
 // Convert vg into vg formatted cactus representation
 // Input graph must be sorted!
-VG cactusify(VG& graph);
+VG cactusify(const PathHandleGraph& graph);
 
 }
 
diff --git a/src/cluster.hpp b/src/cluster.hpp
index df997cc51c..cd6deab517 100644
--- a/src/cluster.hpp
+++ b/src/cluster.hpp
@@ -212,8 +212,8 @@ class MEMClusterer {
     
 protected:
     
-    class HitNode;
     class HitEdge;
+    class HitNode;
     class HitGraph;
     class DPScoreComparator;
     
@@ -232,7 +232,47 @@ class MEMClusterer {
     /// is closest to the optimal separation
     void deduplicate_cluster_pairs(vector<pair<pair<size_t, size_t>, int64_t>>& cluster_pairs, int64_t optimal_separation);
 };
+
+class MEMClusterer::HitEdge {
+public:
+    HitEdge(size_t to_idx, int32_t weight, int64_t distance) : to_idx(to_idx), weight(weight), distance(distance) {}
+    HitEdge() = default;
+    ~HitEdge() = default;
+    
+    /// Index of the node that the edge points to
+    size_t to_idx;
     
+    /// Weight for dynamic programming
+    int32_t weight;
+    
+    /// Estimated distance
+    int64_t distance;
+};
+
+class MEMClusterer::HitNode {
+public:
+    HitNode(const MaximalExactMatch& mem, pos_t start_pos, int32_t score) : mem(&mem), start_pos(start_pos), score(score) { }
+    HitNode() = default;
+    ~HitNode() = default;
+    
+    const MaximalExactMatch* mem;
+    
+    /// Position of GCSA hit in the graph
+    pos_t start_pos;
+    
+    /// Score of the exact match this node represents
+    int32_t score;
+    
+    /// Score used in dynamic programming
+    int32_t dp_score;
+    
+    /// Edges from this node that are colinear with the read
+    vector<HitEdge> edges_from;
+    
+    /// Edges to this node that are colinear with the read
+    vector<HitEdge> edges_to;
+};
+
 class MEMClusterer::HitGraph {
 public:
     
@@ -286,46 +326,6 @@ class MEMClusterer::HitGraph {
     UnionFind components;
 };
     
-class MEMClusterer::HitNode {
-public:
-    HitNode(const MaximalExactMatch& mem, pos_t start_pos, int32_t score) : mem(&mem), start_pos(start_pos), score(score) { }
-    HitNode() = default;
-    ~HitNode() = default;
-    
-    const MaximalExactMatch* mem;
-    
-    /// Position of GCSA hit in the graph
-    pos_t start_pos;
-    
-    /// Score of the exact match this node represents
-    int32_t score;
-    
-    /// Score used in dynamic programming
-    int32_t dp_score;
-    
-    /// Edges from this node that are colinear with the read
-    vector<HitEdge> edges_from;
-    
-    /// Edges to this node that are colinear with the read
-    vector<HitEdge> edges_to;
-};
-
-class MEMClusterer::HitEdge {
-public:
-    HitEdge(size_t to_idx, int32_t weight, int64_t distance) : to_idx(to_idx), weight(weight), distance(distance) {}
-    HitEdge() = default;
-    ~HitEdge() = default;
-    
-    /// Index of the node that the edge points to
-    size_t to_idx;
-    
-    /// Weight for dynamic programming
-    int32_t weight;
-    
-    /// Estimated distance
-    int64_t distance;
-};
-
 struct MEMClusterer::DPScoreComparator {
 private:
     const vector<HitNode>& nodes;
diff --git a/src/gbwtgraph_helper.cpp b/src/gbwtgraph_helper.cpp
index e9dbfda99e..ebd15b4ed0 100644
--- a/src/gbwtgraph_helper.cpp
+++ b/src/gbwtgraph_helper.cpp
@@ -468,11 +468,20 @@ void cache_payloads(
 
     const handlegraph::HandleGraph* graph_ptr = (const handlegraph::HandleGraph*) &gbz.graph;
 
+    double total_zipcode_time = 0.0, total_decoder_time = 0.0;
+    std::atomic<uint64_t> node_count = 0;
     gbz.graph.for_each_handle([&](const handle_t& handle) {
         nid_t node_id = gbz.graph.get_id(handle);
-        ZipCode zipcode;
         pos_t pos = make_pos_t(node_id, false, 0);
-        zipcode.fill_in_zipcode_from_pos(distance_index, pos, true, graph_ptr);
+        ZipCode zipcode;
+        zipcode.fill_in_zipcode_from_pos(distance_index, pos, false, graph_ptr);
+        zipcode.fill_in_full_decoder();
+        if (++node_count % 10000 == 0 && progress) {
+            double telapsed = gbwt::readTimer() - start;
+            #pragma omp critical (cerr)
+            std::cerr << "  Cached " << node_count << " nodes in " << telapsed << "s" << std::endl;
+        }
+
         payload_t payload = zipcode.get_payload_from_zip();
         if (payload == MIPayload::NO_CODE && oversized_zipcodes != nullptr) {
             // The zipcode is too large for the payload field.
diff --git a/src/graph.cpp b/src/graph.cpp
index beca52b5e1..3f23ffef18 100644
--- a/src/graph.cpp
+++ b/src/graph.cpp
@@ -2,93 +2,6 @@
 
 namespace vg {
 
-void sort_by_id_dedup_and_clean(Graph& graph) {
-    remove_duplicates(graph); // graph is sorted here
-    remove_orphan_edges(graph);
-}
-
-void remove_duplicates(Graph& graph) {
-    remove_duplicate_nodes(graph);
-    remove_duplicate_edges(graph);
-}
-
-void remove_duplicate_edges(Graph& graph) {
-    sort_edges_by_id(graph);
-    graph.mutable_edge()->erase(std::unique(graph.mutable_edge()->begin(),
-                                            graph.mutable_edge()->end(),
-                                            [](const Edge& a, const Edge& b) {
-                                                return make_tuple(a.from(), a.to(), a.from_start(), a.to_end())
-                                                    == make_tuple(b.from(), b.to(), b.from_start(), b.to_end());
-                                            }), graph.mutable_edge()->end());
-
-}
-
-void remove_duplicate_nodes(Graph& graph) {
-    sort_nodes_by_id(graph);
-    graph.mutable_node()->erase(std::unique(graph.mutable_node()->begin(),
-                                            graph.mutable_node()->end(),
-                                            [](const Node& a, const Node& b) {
-                                                return a.id() == b.id();
-                                            }), graph.mutable_node()->end());
-}
-
-void remove_orphan_edges(Graph& graph) {
-    set<id_t> ids;
-    for (auto& node : graph.node()) {
-        ids.insert(node.id());
-    }
-    graph.mutable_edge()->erase(std::remove_if(graph.mutable_edge()->begin(),
-                                               graph.mutable_edge()->end(),
-                                               [&ids](const Edge& e) {
-                                                   return !ids.count(e.from()) || !ids.count(e.to());
-                                               }), graph.mutable_edge()->end());
-}
-
-void sort_by_id(Graph& graph) {
-    sort_nodes_by_id(graph);
-    sort_edges_by_id(graph);
-}
-
-void sort_nodes_by_id(Graph& graph) {
-    std::sort(graph.mutable_node()->begin(),
-              graph.mutable_node()->end(),
-              [](const Node& a, const Node& b) {
-                  return a.id() < b.id();
-              });
-}
-
-void sort_edges_by_id(Graph& graph) {
-    std::sort(graph.mutable_edge()->begin(),
-              graph.mutable_edge()->end(),
-              [](const Edge& a, const Edge& b) {
-                  return make_tuple(a.from(), a.to(), a.from_start(), a.to_end())
-                      < make_tuple(b.from(), b.to(), b.from_start(), b.to_end());
-              });
-}
-
-bool is_id_sortable(const Graph& graph) {
-    for (auto& edge : graph.edge()) {
-        if (edge.from() >= edge.to()) return false;
-    }
-    return true;
-}
-
-bool has_inversion(const Graph& graph) {
-    for (auto& edge : graph.edge()) {
-        if (edge.from_start() || edge.to_end()) return true;
-    }
-    return false;
-}
-
-void flip_doubly_reversed_edges(Graph& graph) {
-    for (auto& edge : *graph.mutable_edge()) {
-        if (edge.from_start() && edge.to_end()) {
-            edge.set_from_start(false);
-            edge.set_to_end(false);
-        }
-    }
-}
-    
 void from_handle_graph(const HandleGraph& from, Graph& to) {
     from.for_each_handle([&](const handle_t& h) {
         Node* node = to.add_node();
diff --git a/src/graph.hpp b/src/graph.hpp
index 964e46cceb..c85afe88ab 100644
--- a/src/graph.hpp
+++ b/src/graph.hpp
@@ -11,39 +11,6 @@ namespace vg {
 
 using namespace std;
 
-/// remove duplicates and sort by id
-void sort_by_id_dedup_and_clean(Graph& graph);
-
-/// remove duplicate nodes and edges
-void remove_duplicates(Graph& graph);
-
-/// remove duplicate edges
-void remove_duplicate_edges(Graph& graph);
-
-/// remove duplicate nodes
-void remove_duplicate_nodes(Graph& graph);
-
-/// remove edges that link to a node that is not in the graph
-void remove_orphan_edges(Graph& graph);
-
-/// order the nodes and edges in the graph by id
-void sort_by_id(Graph& graph);
-
-/// order the nodes in the graph by id
-void sort_nodes_by_id(Graph& graph);
-
-/// order the edges in the graph by id pairs
-void sort_edges_by_id(Graph& graph);
-
-/// returns true if the graph is id-sortable (no reverse links)
-bool is_id_sortable(const Graph& graph);
-
-/// returns true if we find an edge that may specify an inversion
-bool has_inversion(const Graph& graph);
-
-/// clean up doubly-reversed edges
-void flip_doubly_reversed_edges(Graph& graph);
-
 // transfer data from a HandleGraph into an empty Graph
 void from_handle_graph(const HandleGraph& from, Graph& to);
 
diff --git a/src/multipath_mapper.cpp b/src/multipath_mapper.cpp
index 74ad7718c1..f90214123c 100644
--- a/src/multipath_mapper.cpp
+++ b/src/multipath_mapper.cpp
@@ -2448,7 +2448,7 @@ namespace vg {
                     // in the left_idxs and right_idxs vectors
                     int64_t target_len = 2 * seq_len - left_side.clip_length - right_side.clip_length;
                     auto distance_diff = [&](size_t l, size_t r) {
-                        return abs<int64_t>(get<2>(left_sites[left_idxs[l]]) + get<2>(right_sites[right_idxs[r]]) - target_len);
+                        return std::abs(static_cast<int64_t>(get<2>(left_sites[left_idxs[l]]) + get<2>(right_sites[right_idxs[r]]) - target_len));
                     };
                     
                     // sweep to identify pairs that most nearly align
diff --git a/src/recombinator.cpp b/src/recombinator.cpp
index a9aaed4b10..07915118ed 100644
--- a/src/recombinator.cpp
+++ b/src/recombinator.cpp
@@ -1585,7 +1585,7 @@ void add_path(const gbwt::GBWT& source, gbwt::size_type path_id, gbwt::GBWTBuild
     gbwt::PathName path_name = source.metadata.path(path_id);
     std::string sample_name = source.metadata.sample(path_name.sample);
     std::string contig_name = source.metadata.contig(path_name.contig);
-    if (sample_name == gbwtgraph::REFERENCE_PATH_SAMPLE_NAME) {
+    if (sample_name == gbwtgraph::GENERIC_PATH_SAMPLE_NAME) {
         metadata.add_generic_path(contig_name);
     } else {
         // Reference samples will be copied later.
diff --git a/src/snarl_distance_index.cpp b/src/snarl_distance_index.cpp
index 01e48e62ea..7c79e424fc 100644
--- a/src/snarl_distance_index.cpp
+++ b/src/snarl_distance_index.cpp
@@ -2,6 +2,8 @@
 //#define debug_snarl_traversal
 //#define debug_distances
 //#define debug_subgraph
+//#define debug_hub_label_build
+//#define debug_hub_label_storage
 
 #include "snarl_distance_index.hpp"
 
@@ -92,7 +94,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
 
     //Stores unfinished records, as type of record and offset into appropriate vector
     //(temp_node/snarl/chain_records)
-    vector<pair<SnarlDistanceIndex::temp_record_t, size_t>> stack;
+    vector<SnarlDistanceIndex::temp_record_ref_t> stack;
 
     //There may be components of the root that are connected to each other. Each connected component will
     //get put into a (fake) root-level snarl, but we don't know what those components will be initially,
@@ -113,7 +115,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
 #ifdef debug_distance_indexing
         cerr << "  Starting new chain at " << graph->get_id(chain_start_handle) << (graph->get_is_reverse(chain_start_handle) ? " reverse" : " forward") << endl;
         //We shouldn't have seen this node before
-        //assert(temp_index.temp_node_records[graph->get_id(chain_start_handle)-min_node_id].node_id == 0);
+        //assert(temp_index.get_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(chain_start_handle))).node_id == 0);
 #endif
 
         //Fill in node in chain
@@ -127,7 +129,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
 
 
         //And the node record itself
-        auto& temp_node = temp_index.temp_node_records.at(node_id-temp_index.min_node_id);
+        auto& temp_node = temp_index.get_node(temp_chain.children.back());
         temp_node.node_id = node_id;
         temp_node.node_length = graph->get_length(chain_start_handle);
         temp_node.reversed_in_parent = graph->get_is_reverse(chain_start_handle);
@@ -141,13 +143,13 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
          */
 
         //Done with this chain
-        pair<SnarlDistanceIndex::temp_record_t, size_t> chain_index = stack.back();
+        SnarlDistanceIndex::temp_record_ref_t chain_index = stack.back();
         stack.pop_back();
 
 #ifdef debug_distance_indexing
         assert(chain_index.first == SnarlDistanceIndex::TEMP_CHAIN);
 #endif
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.temp_chain_records.at(chain_index.second);
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(chain_index);
         nid_t node_id = graph->get_id(chain_end_handle);
 
         if (temp_chain_record.children.size() == 1 && node_id == temp_chain_record.start_node_id) {
@@ -159,7 +161,8 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
 #endif
 
             //Get the node
-            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.temp_node_records.at(node_id - temp_index.min_node_id);
+            SnarlDistanceIndex::temp_record_ref_t node_index = make_pair(SnarlDistanceIndex::TEMP_NODE, node_id);
+            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.get_node(node_index);
 
             temp_node_record.reversed_in_parent = false;
 
@@ -199,20 +202,21 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                     for (nid_t next_id : reachable_nodes) {
                         //For each node that this is connected to, check if we've already seen it and if we have, then
                         //union this chain and that node's chain
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.temp_node_records[next_id-temp_index.min_node_id];
+                        SnarlDistanceIndex::temp_record_ref_t next_index = make_pair(SnarlDistanceIndex::TEMP_NODE, next_id);
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.get_node(next_index);
                         if (node_record.node_id != 0) {
                             //If we've already seen this node, union it with the new one
                             //If we can see it by walking out from this top-level chain, then it must also be a
                             //top-level chain (or node pretending to be a chain)
                             size_t other_i = node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN
-                                           ? temp_index.temp_chain_records[node_record.parent.second].root_snarl_index
+                                           ? temp_index.get_chain(node_record.parent).root_snarl_index
                                            : node_record.root_snarl_index;
 #ifdef debug_distance_indexing
                             assert(other_i != std::numeric_limits<size_t>::max());
 #endif
                             root_snarl_component_uf.union_groups(other_i, temp_node_record.root_snarl_index);
 //#ifdef debug_distance_indexing
-//                            cerr << "        Union this trivial  with " << temp_index.temp_chain_records[node_record.parent.second].start_node_id << " " << temp_index.temp_chain_records[node_record.parent.second].end_node_id << endl;
+//                            cerr << "        Union this trivial  with " << temp_index.get_chain(node_record.parent).start_node_id << " " << temp_index.get_chain(node_record.parent).end_node_id << endl;
 //#endif
                         } else {
                             new_component = false;
@@ -226,7 +230,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
             } else {
                 //The last thing on the stack is the parent of this chain, which must be a snarl
                 temp_node_record.parent = stack.back();
-                auto& parent_snarl_record = temp_index.temp_snarl_records.at(temp_node_record.parent.second);
+                auto& parent_snarl_record = temp_index.get_snarl(temp_node_record.parent);
                 temp_node_record.rank_in_parent = parent_snarl_record.children.size() + 2;
                 parent_snarl_record.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
             }
@@ -282,20 +286,25 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                     for (nid_t next_id : reachable_nodes) {
                         //For each node that this is connected to, check if we've already seen it and if we have, then
                         //union this chain and that node's chain
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.temp_node_records[next_id-temp_index.min_node_id];
+                        SnarlDistanceIndex::temp_record_ref_t next_index = make_pair(SnarlDistanceIndex::TEMP_NODE, next_id);
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.get_node(next_index);
                         if (node_record.node_id != 0) {
                             //If we've already seen this node, union it with the new one
                             //If we can see it by walking out from this top-level chain, then it must also be a
                             //top-level chain (or node pretending to be a chain)
                             size_t other_i = node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN
-                                           ? temp_index.temp_chain_records[node_record.parent.second].root_snarl_index
+                                           ? temp_index.get_chain(node_record.parent).root_snarl_index
                                            : node_record.root_snarl_index;
 #ifdef debug_distance_indexing
                             assert(other_i != std::numeric_limits<size_t>::max());
 #endif
                             root_snarl_component_uf.union_groups(other_i, temp_chain_record.root_snarl_index);
 #ifdef debug_distance_indexing
-                            cerr << "        Union this chain with " << temp_index.temp_chain_records[node_record.parent.second].start_node_id << " " << temp_index.temp_chain_records[node_record.parent.second].end_node_id << endl;
+                            if (node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN) {
+                                cerr << "        Union this chain with " << temp_index.get_chain(node_record.parent).start_node_id << " " << temp_index.get_chain(node_record.parent).end_node_id << endl;
+                            } else {
+                                cerr << "        Union this chain with root " << node_record.root_snarl_index << endl;
+                            }
 #endif
                         } else {
                             new_component = false;
@@ -310,7 +319,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
             } else {
                 //The last thing on the stack is the parent of this chain, which must be a snarl
                 temp_chain_record.parent = stack.back();
-                auto& parent_snarl_record = temp_index.temp_snarl_records.at(temp_chain_record.parent.second);
+                auto& parent_snarl_record = temp_index.get_snarl(temp_chain_record.parent);
                 temp_chain_record.rank_in_parent = parent_snarl_record.children.size() + 2;
                 parent_snarl_record.children.emplace_back(chain_index);
             }
@@ -347,13 +356,13 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
          * parent chain
          * Also create a node record
          */
-        pair<SnarlDistanceIndex::temp_record_t, size_t> snarl_index = stack.back();
+        SnarlDistanceIndex::temp_record_ref_t snarl_index = stack.back();
         stack.pop_back();
 #ifdef debug_distance_indexing
         assert(snarl_index.first == SnarlDistanceIndex::TEMP_SNARL);
         assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
 #endif
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records[snarl_index.second];
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(snarl_index);
         nid_t node_id = graph->get_id(snarl_end_handle);
 
         //Record the end node in the snarl
@@ -362,12 +371,12 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
         temp_snarl_record.end_node_length = graph->get_length(snarl_end_handle);
         temp_snarl_record.node_count = temp_snarl_record.children.size();
         bool any_edges_in_snarl = false;
-        graph->follow_edges(graph->get_handle(temp_snarl_record.start_node_id, temp_snarl_record.start_node_rev), false, [&](const handle_t next_handle) {
+        graph->follow_edges(graph->get_handle(temp_snarl_record.start_node_id, temp_snarl_record.start_node_rev), false, [&](const handle_t& next_handle) {
             if (graph->get_id(next_handle) != temp_snarl_record.end_node_id) {
                 any_edges_in_snarl = true;
             }
         });
-        graph->follow_edges(graph->get_handle(temp_snarl_record.end_node_id, !temp_snarl_record.end_node_rev), false, [&](const handle_t next_handle) {
+        graph->follow_edges(graph->get_handle(temp_snarl_record.end_node_id, !temp_snarl_record.end_node_rev), false, [&](const handle_t& next_handle) {
             if (graph->get_id(next_handle) != temp_snarl_record.start_node_id) {
                 any_edges_in_snarl = true;
             }
@@ -377,53 +386,50 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
             //This is a trivial snarl
             temp_snarl_record.is_trivial = true;
 
+#ifdef debug_distance_indexing
+            cerr << "  Ending and forgetting trivial snarl " << temp_index.structure_start_end_as_string(snarl_index)
+                 << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_snarl_record.parent) << endl;
+#endif
+
             //Add the end node to the chain
 #ifdef debug_distance_indexing
             assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
 #endif
             temp_snarl_record.parent = stack.back();
-            auto& temp_chain = temp_index.temp_chain_records.at(stack.back().second);
+            auto& temp_chain = temp_index.get_chain(stack.back());
             temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
 
-            //Remove the snarl record
+            //Remove the snarl record.
+            //This invalidates snarl_index!!!
 #ifdef debug_distance_indexing
             assert(temp_index.temp_snarl_records.size() == snarl_index.second+1);
 #endif
             temp_index.temp_snarl_records.pop_back();
         } else {
             //This is the child of a chain
+            
+#ifdef debug_distance_indexing
+            cerr << "  Ending new snarl " << temp_index.structure_start_end_as_string(snarl_index)
+                 << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_snarl_record.parent) << endl;
+#endif
+
 #ifdef debug_distance_indexing
             assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
 #endif
             temp_snarl_record.parent = stack.back();
-            auto& temp_chain = temp_index.temp_chain_records.at(stack.back().second);
+            auto& temp_chain = temp_index.get_chain(stack.back());
             temp_chain.children.emplace_back(snarl_index);
             temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
 
         }
-        //Record the snarl as a child of its chain
-        //if (stack.empty()) {
-        //    assert(false);
-        //    //TODO: The snarl should always be the child of a chain
-        //    //If this was the last thing on the stack, then this was a root
-        //    //TODO: I'm not sure if this would get put into a chain or not
-        //    temp_snarl_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
-        //    temp_index.components.emplace_back(snarl_index);
-        //} 
 
         //Record the node itself. This gets done for the start of the chain, and ends of snarls
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.temp_node_records.at(node_id-temp_index.min_node_id);
+        SnarlDistanceIndex::temp_record_ref_t node_index = make_pair(SnarlDistanceIndex::TEMP_NODE, node_id);
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.get_node(node_index);
         temp_node_record.node_id = node_id;
         temp_node_record.node_length = graph->get_length(snarl_end_handle);
         temp_node_record.reversed_in_parent = graph->get_is_reverse(snarl_end_handle);
         temp_node_record.parent = stack.back();
-
-
-
-#ifdef debug_distance_indexing
-        cerr << "  Ending new snarl " << temp_index.structure_start_end_as_string(snarl_index)
-             << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_snarl_record.parent) << endl;
-#endif
     });
 
     /*
@@ -451,7 +457,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
         for (size_t chain_i : root_snarl_indexes) {
             //For each chain component of this root-level snarl
             if (temp_index.root_snarl_components[chain_i].first == SnarlDistanceIndex::TEMP_CHAIN){
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.temp_chain_records[temp_index.root_snarl_components[chain_i].second];
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(temp_index.root_snarl_components[chain_i]);
                 temp_chain_record.parent = make_pair(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size() - 1);
                 temp_chain_record.rank_in_parent = temp_snarl_record.children.size();
                 temp_chain_record.reversed_in_parent = false;
@@ -461,7 +467,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
 #ifdef debug_distance_indexing
                 assert(temp_index.root_snarl_components[chain_i].first == SnarlDistanceIndex::TEMP_NODE);
 #endif
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.temp_node_records[temp_index.root_snarl_components[chain_i].second - temp_index.min_node_id];
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.get_node(temp_index.root_snarl_components[chain_i]);
                 temp_node_record.parent = make_pair(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size() - 1);
                 temp_node_record.rank_in_parent = temp_snarl_record.children.size();
                 temp_node_record.reversed_in_parent = false;
@@ -484,11 +490,11 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
     cerr << "Filling in the distances in snarls" << endl;
 #endif
     for (int i = temp_index.temp_chain_records.size()-1 ; i >= 0 ; i--) {
-
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.temp_chain_records[i];
+        SnarlDistanceIndex::temp_record_ref_t chain_index = make_pair(SnarlDistanceIndex::TEMP_CHAIN, i);
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(chain_index);
 #ifdef debug_distance_indexing
         assert(!temp_chain_record.is_trivial);
-        cerr << "  At "  << (temp_chain_record.is_trivial ? " trivial " : "") << " chain " << temp_index.structure_start_end_as_string(make_pair(SnarlDistanceIndex::TEMP_CHAIN, i)) << endl;
+        cerr << "  At"  << (temp_chain_record.is_trivial ? " trivial " : "") << "chain " << temp_index.structure_start_end_as_string(chain_index) << endl;
 #endif
 
         //Add the first values for the prefix sum and backwards loop vectors
@@ -505,7 +511,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
         size_t curr_component = 0; //which component of the chain are we in
         size_t last_node_length = 0;
         for (size_t chain_child_i = 0 ; chain_child_i < temp_chain_record.children.size() ; chain_child_i++ ){
-            const pair<SnarlDistanceIndex::temp_record_t, size_t>& chain_child_index = temp_chain_record.children[chain_child_i];
+            const SnarlDistanceIndex::temp_record_ref_t& chain_child_index = temp_chain_record.children[chain_child_i];
             //Go through each of the children in the chain, skipping nodes
             //The snarl may be trivial, in which case don't fill in the distances
 #ifdef debug_distance_indexing
@@ -518,7 +524,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                 //all distances, then add distances to the chain that this is in
                 //The parent chain will be the last thing in the stack
                 SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = 
-                        temp_index.temp_snarl_records.at(chain_child_index.second);
+                        temp_index.get_snarl(chain_child_index);
 
                 //Fill in this snarl's distances
                 populate_snarl_index(temp_index, chain_child_index, size_limit, only_top_level_chain_distances, graph);
@@ -566,13 +572,13 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                     //If this is a node and the last thing was also a node,
                     //then there was a trivial snarl 
                     SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = 
-                            temp_index.temp_node_records.at(chain_child_index.second-temp_index.min_node_id);
+                            temp_index.get_node(chain_child_index);
 
                     //Check if there is a loop in this node
                     //Snarls get counted as trivial if they contain no nodes but they might still have edges
                     size_t backward_loop = std::numeric_limits<size_t>::max();
 
-                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, !temp_node_record.reversed_in_parent), false, [&](const handle_t next_handle) {
+                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, !temp_node_record.reversed_in_parent), false, [&](const handle_t& next_handle) {
                         if (graph->get_id(next_handle) == temp_node_record.node_id) {
                             //If there is a loop going backwards (relative to the chain) back to the same node
                             backward_loop = 0;
@@ -590,7 +596,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                     }
                     temp_chain_record.chain_components.emplace_back(curr_component);
                 }
-                last_node_length = temp_index.temp_node_records.at(chain_child_index.second - temp_index.min_node_id).node_length;
+                last_node_length = temp_index.get_node(chain_child_index).node_length;
                 //And update the chains max length
                 temp_chain_record.max_length = SnarlDistanceIndex::sum(temp_chain_record.max_length,
                                                                        last_node_length);
@@ -626,7 +632,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
 
             //If this is a looping chain, then check the first snarl for a loop
             if (temp_chain_record.children.at(1).first == SnarlDistanceIndex::TEMP_SNARL) {
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(temp_chain_record.children.at(1).second);
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(temp_chain_record.children.at(1));
                 temp_chain_record.forward_loops[temp_chain_record.forward_loops.size()-1] = temp_snarl_record.distance_start_start;
             } 
         }
@@ -637,7 +643,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
         for (int j = (int)temp_chain_record.children.size() - 1 ; j >= 0 ; j--) {
             auto& child = temp_chain_record.children.at(j);
             if (child.first == SnarlDistanceIndex::TEMP_SNARL){
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(child.second);
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(child);
                 if (temp_chain_record.chain_components.at(node_i) != temp_chain_record.chain_components.at(node_i+1) &&
                     temp_chain_record.chain_components.at(node_i+1) != 0){
                     //If this is a new chain component, then add the loop distance from the snarl
@@ -656,13 +662,13 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
             } else {
                 if (last_node_length != 0) {
                     SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = 
-                            temp_index.temp_node_records.at(child.second-temp_index.min_node_id);
+                            temp_index.get_node(child);
 
 
                     //Check if there is a loop in this node
                     //Snarls get counted as trivial if they contain no nodes but they might still have edges
                     size_t forward_loop = std::numeric_limits<size_t>::max();
-                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, temp_node_record.reversed_in_parent), false, [&](const handle_t next_handle) {
+                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, temp_node_record.reversed_in_parent), false, [&](const handle_t& next_handle) {
                         if (graph->get_id(next_handle) == temp_node_record.node_id) {
                             //If there is a loop going forward (relative to the chain) back to the same node
                             forward_loop = 0;
@@ -673,7 +679,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                                                  2*last_node_length));
                     node_i--;
                 }
-                last_node_length = temp_index.temp_node_records.at(child.second - temp_index.min_node_id).node_length;
+                last_node_length = temp_index.get_node(child).node_length;
             }
         }
 
@@ -692,7 +698,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                 for (size_t i = 1 ; i < temp_chain_record.children.size()-1 ; i++ ) {
                     auto& child = temp_chain_record.children.at(i);
                     if (child.first == SnarlDistanceIndex::TEMP_SNARL) {
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(child.second);
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(child);
                         size_t new_loop_distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
                                                       temp_chain_record.backward_loops.at(node_i-1), 
                                                       2*temp_snarl_record.min_length), 
@@ -715,7 +721,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                             temp_chain_record.backward_loops.at(node_i) = std::min(old_loop_distance,new_loop_distance);
                             node_i++;
                         }
-                        last_node_length = temp_index.temp_node_records.at(child.second - temp_index.min_node_id).node_length;
+                        last_node_length = temp_index.get_node(child).node_length;
                     }
                 }
             }
@@ -729,7 +735,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                 for (int j = (int)temp_chain_record.children.size() - 1 ; j >= 0 ; j--) {
                     auto& child = temp_chain_record.children.at(j);
                     if (child.first == SnarlDistanceIndex::TEMP_SNARL){
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(child.second);
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(child);
                         size_t new_distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
                                                 temp_chain_record.forward_loops.at(node_i+1), 
                                                 2* temp_snarl_record.min_length),
@@ -751,7 +757,7 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
                             temp_chain_record.forward_loops.at(node_i) = std::min(old_distance, new_distance);
                             node_i--;
                         }
-                        last_node_length = temp_index.temp_node_records.at(child.second - temp_index.min_node_id).node_length;
+                        last_node_length = temp_index.get_node(child).node_length;
                     }
                 } 
             }
@@ -767,9 +773,9 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
 #ifdef debug_distance_indexing
     cerr << "Filling in the distances in root snarls and distances along chains" << endl;
 #endif
-    for (pair<SnarlDistanceIndex::temp_record_t, size_t>& component_index : temp_index.components) {
+    for (SnarlDistanceIndex::temp_record_ref_t& component_index : temp_index.components) {
         if (component_index.first == SnarlDistanceIndex::TEMP_SNARL) {
-            SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(component_index.second);
+            SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(component_index);
             populate_snarl_index(temp_index, component_index, size_limit, only_top_level_chain_distances, graph);
             temp_snarl_record.min_length = std::numeric_limits<size_t>::max();
         }
@@ -782,9 +788,44 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
     return temp_index;
 }
 
+/**
+ * Populate a row of the distance matrix.
+ * Also responsible for filling in min_length, distance_start_start, and distance_start_end on the TemporarySnarlRecord when a distance matrix is used.
+ */
+static void populate_distance_matrix_row(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const SnarlDistanceIndex::temp_record_ref_t& start_index, const HandleGraph* graph, size_t start_rank, bool is_internal_node, size_t size_limit); 
+
+/**
+ * Fills in required distance matrix rows for each child.
+ * Only called for non-oversized snarls (size_limit == 0 || node_count <= size_limit);
+ * oversized snarls go through populate_hub_labeling instead.
+ * - Normal snarl: all rows
+ * - size_limit == 0: no distances in index, so no rows
+ * - Top-level chain distances only: boundaries and tips only
+ */
+static void populate_distance_matrix_if_needed(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph, size_t size_limit, bool only_top_level_chain_distances); 
 
+/**
+ * Does three things:
+ * - Builds temp graph that hub labels will be built on
+ * - Builds the hub labels
+ * - Stores labels in temp_snarl_record
+ */
+static void populate_hub_labeling(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph);
+
+/**
+ * Determine if a snarl is regular or not.
+ *
+ * A regular snarl is a snarl that consists of only nodes or
+ * chains connected to the start and end, without any connections between
+ * multiple children, or any way to turn around. There may be an edge directly
+ * across.
+ *
+ * A simple snarl is always regular.
+ */
+static bool check_regularity(const SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph);
 
-/*Fill in the snarl index.
+/**
+ * Fill in the snarl index.
  * The index will already know its boundaries and everything knows their relationships in the
  * snarl tree. This needs to fill in the distances and the ranks of children in the snarl
  * The rank of a child is arbitrary, except that the start node will always be 0 and the end node
@@ -792,21 +833,18 @@ SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
  */
 void populate_snarl_index(
                 SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
-                pair<SnarlDistanceIndex::temp_record_t, size_t> snarl_index, size_t size_limit,
+                SnarlDistanceIndex::temp_record_ref_t snarl_index, size_t size_limit,
                 bool only_top_level_chain_distances, const HandleGraph* graph) {
 #ifdef debug_distance_indexing
     cerr << "Getting the distances for snarl " << temp_index.structure_start_end_as_string(snarl_index) << endl;
     assert(snarl_index.first == SnarlDistanceIndex::TEMP_SNARL);
 #endif
-    SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(snarl_index.second);
+    SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(snarl_index);
     temp_snarl_record.is_simple=true;
 
-
-
-
     /*Helper function to find the ancestor of a node that is a child of this snarl */
-    auto get_ancestor_of_node = [&](pair<SnarlDistanceIndex::temp_record_t, size_t> curr_index,
-                                    pair<SnarlDistanceIndex::temp_record_t, size_t> ancestor_snarl_index) {
+    auto get_ancestor_of_node = [&](SnarlDistanceIndex::temp_record_ref_t curr_index,
+                                    SnarlDistanceIndex::temp_record_ref_t ancestor_snarl_index) {
 
         //This is a child that isn't a node, so it must be a chain
         if (curr_index.second == temp_snarl_record.start_node_id || 
@@ -815,11 +853,11 @@ void populate_snarl_index(
         }
 
         //Otherwise, walk up until we hit the current snarl
-        pair<SnarlDistanceIndex::temp_record_t, size_t> parent_index = temp_index.temp_node_records.at(curr_index.second-temp_index.min_node_id).parent;
+        SnarlDistanceIndex::temp_record_ref_t parent_index = temp_index.get_node(curr_index).parent;
         while (parent_index != ancestor_snarl_index) {
             curr_index=parent_index;
-            parent_index = parent_index.first == SnarlDistanceIndex::TEMP_SNARL ? temp_index.temp_snarl_records.at(parent_index.second).parent
-                                                            : temp_index.temp_chain_records.at(parent_index.second).parent;
+            parent_index = parent_index.first == SnarlDistanceIndex::TEMP_SNARL ? temp_index.get_snarl(parent_index).parent
+                                                            : temp_index.get_chain(parent_index).parent;
 #ifdef debug_distance_indexing
             assert(parent_index.first != SnarlDistanceIndex::TEMP_ROOT); 
 #endif
@@ -829,7 +867,7 @@ void populate_snarl_index(
     };
 
     // TODO: Copying the list
-    vector<pair<SnarlDistanceIndex::temp_record_t, size_t>> all_children = temp_snarl_record.children;
+    vector<SnarlDistanceIndex::temp_record_ref_t> all_children = temp_snarl_record.children;
 
     // Identify tips
     for (const auto& child : all_children) {
@@ -931,23 +969,23 @@ void populate_snarl_index(
                 // If the current child is the start bound, then get the start node pointing in 
                 current_graph_handle = topological_sort_start;
             } else {
-                pair<SnarlDistanceIndex::temp_record_t, size_t> current_index = all_children[current_child_index.first];
+                SnarlDistanceIndex::temp_record_ref_t current_index = all_children[current_child_index.first];
                 if (current_index.first == SnarlDistanceIndex::TEMP_NODE) {
                     // If the current child is a node, then get the node pointing in the correct direction
                     current_graph_handle = graph->get_handle(current_index.second, current_child_index.second);
                 } else if (current_child_index.second) {
                     // If the current child is a chain, and we're traversing the chain backwards
-                    current_graph_handle = graph->get_handle(temp_index.temp_chain_records[current_index.second].start_node_id, 
-                                                            !temp_index.temp_chain_records[current_index.second].start_node_rev);
+                    current_graph_handle = graph->get_handle(temp_index.get_chain(current_index).start_node_id,
+                                                            !temp_index.get_chain(current_index).start_node_rev);
                 } else {
                     // Otherwise, the current child is a chain and we're traversing the chain forwards
-                    current_graph_handle = graph->get_handle(temp_index.temp_chain_records[current_index.second].end_node_id, 
-                                                             temp_index.temp_chain_records[current_index.second].end_node_rev);
+                    current_graph_handle = graph->get_handle(temp_index.get_chain(current_index).end_node_id,
+                                                             temp_index.get_chain(current_index).end_node_rev);
                 }
             }
-                 
+
             // Try all edges leaving this side
-            graph->follow_edges(current_graph_handle, false, [&](const handle_t next_handle) {
+            graph->follow_edges(current_graph_handle, false, [&](const handle_t& next_handle) {
 #ifdef debug_distance_indexing
                 cerr << "Following forward edges from " << graph->get_id(current_graph_handle) 
                      << " to " << graph->get_id(next_handle) << endl;
@@ -958,19 +996,18 @@ void populate_snarl_index(
                     return true;
                 }
                 // Is next_handle a new source? Any unvisited predecessors?
-                pair<SnarlDistanceIndex::temp_record_t, size_t> next_index = 
-                    get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(next_handle)), snarl_index); 
-                bool next_is_node = next_index.first == SnarlDistanceIndex::TEMP_NODE;
-                size_t next_rank = next_is_node
-                            ? temp_index.temp_node_records.at(next_index.second - temp_index.min_node_id).rank_in_parent  
-                            : temp_index.temp_chain_records[next_index.second].rank_in_parent;
+                SnarlDistanceIndex::temp_record_ref_t next_index =
+                    get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(next_handle)), snarl_index);
+                size_t next_rank = next_index.first == SnarlDistanceIndex::TEMP_NODE
+                            ? temp_index.get_node(next_index).rank_in_parent
+                            : temp_index.get_chain(next_index).rank_in_parent;
                 // Subtract 2 to get the index from the rank
                 assert(next_rank >= 2);
                 next_rank -= 2;
                 assert(all_children[next_rank] == next_index);
-                bool next_rev = (next_is_node || temp_index.temp_chain_records[next_index.second].is_trivial)
-                            ? graph->get_is_reverse(next_handle) 
-                            : graph->get_id(next_handle) == temp_index.temp_chain_records[next_index.second].end_node_id;
+                bool next_rev = next_index.first == SnarlDistanceIndex::TEMP_NODE || temp_index.get_chain(next_index).is_trivial
+                            ? graph->get_is_reverse(next_handle)
+                            : graph->get_id(next_handle) == temp_index.get_chain(next_index).end_node_id;
                 if (visited_ranks.count(next_rank) != 0) {
                     // If this is a loop, abort
                     return true;
@@ -979,16 +1016,16 @@ void populate_snarl_index(
                 // Get the handle from the child represented by next_handle going the other way
                 handle_t reverse_handle = next_index.first == SnarlDistanceIndex::TEMP_NODE ? 
                             graph->get_handle(next_index.second, !next_rev) :
-                            (next_rev ? graph->get_handle(temp_index.temp_chain_records[next_index.second].end_node_id, 
-                                                          temp_index.temp_chain_records[next_index.second].end_node_rev)
-                                      : graph->get_handle(temp_index.temp_chain_records[next_index.second].start_node_id, 
-                                                         !temp_index.temp_chain_records[next_index.second].start_node_rev));
+                            (next_rev ? graph->get_handle(temp_index.get_chain(next_index).end_node_id,
+                                                          temp_index.get_chain(next_index).end_node_rev)
+                                      : graph->get_handle(temp_index.get_chain(next_index).start_node_id,
+                                                          !temp_index.get_chain(next_index).start_node_rev));
 
                 // Does this have no unseen incoming edges? Check as we go through incoming edges
                 bool is_source = true;
 
                 // Does this have no unseen incoming edges?
-                graph->follow_edges(reverse_handle, false, [&](const handle_t incoming_handle) {
+                graph->follow_edges(reverse_handle, false, [&](const handle_t& incoming_handle) {
 #ifdef debug_distance_indexing
                 cerr << "Getting backwards edge to " << graph->get_id(incoming_handle) << endl;
 #endif
@@ -998,16 +1035,15 @@ void populate_snarl_index(
                         return true;
                     }
                     // The index of the snarl's child that next_handle represents
-                    pair<SnarlDistanceIndex::temp_record_t, size_t> incoming_index = 
-                        get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(incoming_handle)), snarl_index); 
-                    bool incoming_is_node = incoming_index.first == SnarlDistanceIndex::TEMP_NODE;
-                    size_t incoming_rank = incoming_is_node
-                                ? temp_index.temp_node_records.at(incoming_index.second - temp_index.min_node_id).rank_in_parent  
-                                : temp_index.temp_chain_records[incoming_index.second].rank_in_parent;
-
-                    bool incoming_rev = incoming_is_node || temp_index.temp_chain_records[incoming_index.second].is_trivial 
-                                ? graph->get_is_reverse(incoming_handle) 
-                                : graph->get_id(incoming_handle) == temp_index.temp_chain_records[incoming_index.second].end_node_id;
+                    SnarlDistanceIndex::temp_record_ref_t incoming_index =
+                        get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(incoming_handle)), snarl_index);
+                    size_t incoming_rank = incoming_index.first == SnarlDistanceIndex::TEMP_NODE
+                                ? temp_index.get_node(incoming_index).rank_in_parent
+                                : temp_index.get_chain(incoming_index).rank_in_parent;
+
+                    bool incoming_rev = incoming_index.first == SnarlDistanceIndex::TEMP_NODE || temp_index.get_chain(incoming_index).is_trivial
+                                ? graph->get_is_reverse(incoming_handle)
+                                : graph->get_id(incoming_handle) == temp_index.get_chain(incoming_index).end_node_id;
                     // Subtract 2 to get the index from the rank
                     assert(incoming_rank >= 2);
                     incoming_rank -= 2;
@@ -1046,9 +1082,9 @@ void populate_snarl_index(
         for (size_t new_rank = 0 ; new_rank < topological_sort_order.size() ; new_rank++) {
             size_t old_rank = topological_sort_order[new_rank];
             if (all_children[old_rank].first == SnarlDistanceIndex::TEMP_NODE) {
-                temp_index.temp_node_records.at(all_children[old_rank].second-temp_index.min_node_id).rank_in_parent = new_rank+2;
+                temp_index.get_node(all_children[old_rank]).rank_in_parent = new_rank+2;
             } else {
-                temp_index.temp_chain_records[all_children[old_rank].second].rank_in_parent = new_rank+2;
+                temp_index.get_chain(all_children[old_rank]).rank_in_parent = new_rank+2;
             }
             const auto& old_is_tip = old_tippy_ranks.find(old_rank);
             if (old_is_tip != old_tippy_ranks.end()) {
@@ -1063,30 +1099,143 @@ void populate_snarl_index(
      */
 
 
-    if (size_limit != 0 && !only_top_level_chain_distances) { 
-        //If we are saving distances
-        //Reserve enough space to store all possible distances
-        temp_snarl_record.distances.reserve( temp_snarl_record.node_count > size_limit
-                ? temp_snarl_record.node_count * 2
-                : temp_snarl_record.node_count * temp_snarl_record.node_count);
+    // Add the start and end nodes to the list of children so that we include them in the traversal.
+    if (!temp_snarl_record.is_root_snarl) {
+        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.start_node_id);
+        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.end_node_id);
+    }
+
+    if (size_limit != 0 && temp_snarl_record.node_count > size_limit) {           
+      temp_index.most_oversized_snarl_size = std::max(temp_index.most_oversized_snarl_size, temp_snarl_record.node_count);
+      temp_index.use_oversized_snarls = true;
+      temp_snarl_record.is_simple = false;
+      populate_hub_labeling(temp_index, snarl_index, temp_snarl_record, all_children, graph);
+
+      if (!temp_snarl_record.is_root_snarl) {
+        // We need to query the hub labeling to fill in min_length,
+        // distance_start_start, and distance_start_end with the connectivity
+        // distances through the snarl, not including boundary nodes.
+        //
+        // Luckily we know the start is always child rank 0 forward, and the end
+        // is always child rank 1 forward.
+        //
+        // To exclude the boundary lengths we go from source port to non-source
+        // port.
+        //
+        // Root snarls have no boundary nodes (no rank 0/1), so these queries
+        // are meaningless for them. The root read path ignores these fields too.
+        temp_snarl_record.min_length = promote_distance<size_t>(hhl_query(temp_snarl_record.hub_labels.begin(), bgid(0, false, true), bgid(1, false, false)));
+        temp_snarl_record.distance_start_start = promote_distance<size_t>(hhl_query(temp_snarl_record.hub_labels.begin(), bgid(0, false, true), bgid(0, true, false)));
+        temp_snarl_record.distance_end_end = promote_distance<size_t>(hhl_query(temp_snarl_record.hub_labels.begin(), bgid(1, true, true), bgid(1, false, false)));
+      }
+      // TODO: Should this be here or should it be part of populate_hub_labeling()? Or its own function?
     } else {
+      if (size_limit == 0 || only_top_level_chain_distances) { 
         temp_snarl_record.include_distances = false;
+      }
+      // Also fills in min_length, distance_start_start, and distance_start_end, and sets is_simple to false if snarl isn't simple
+      populate_distance_matrix_if_needed(temp_index, snarl_index, temp_snarl_record, all_children, graph, size_limit, only_top_level_chain_distances);
     }
 
-    if (size_limit != 0 && temp_snarl_record.node_count > size_limit) {
-        temp_index.most_oversized_snarl_size = std::max(temp_index.most_oversized_snarl_size, temp_snarl_record.node_count);
-        temp_index.use_oversized_snarls = true;
+#ifdef debug_distance_indexing 
+    cerr << "snarl " << temp_index.structure_start_end_as_string(snarl_index) << " is_simple: " << temp_snarl_record.is_simple << endl;
+#endif
+
+    if (temp_snarl_record.is_simple) {
+        // If this is a simple snarl (one with only single nodes that connect to the start and end nodes), then
+        // we want to remember if the child nodes are reversed 
+        for (size_t i = 0 ; i < temp_snarl_record.node_count ; i++) {
+            //Get the index of the child
+            const SnarlDistanceIndex::temp_record_ref_t& child_index = temp_snarl_record.children[i];
+            //Which is a node
+#ifdef debug_distance_indexing
+            assert(child_index.first == SnarlDistanceIndex::TEMP_NODE);
+#endif
+
+            //And get the record
+            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record =
+                 temp_index.get_node(child_index);
+            size_t rank =temp_node_record.rank_in_parent;
+
+            
+
+            //Set the orientation of this node in the simple snarl
+            temp_node_record.reversed_in_parent = temp_node_record.distance_left_start == std::numeric_limits<size_t>::max();
+        }
+        
+    } 
+    
+    // Decide if the snarl is regular.
+    temp_snarl_record.is_regular = check_regularity(temp_index, snarl_index, temp_snarl_record, all_children, graph); 
+
+    //Now that the distances are filled in, predict the size of the snarl in the index
+    temp_index.max_index_size += temp_snarl_record.get_max_record_length();
+    if (temp_snarl_record.is_simple) {
+        temp_index.max_index_size -= (temp_snarl_record.children.size() * SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length());
     }
 
-    //Add the start and end nodes to the list of children so that we include them in the traversal 
-    if (!temp_snarl_record.is_root_snarl) {
-        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.start_node_id);
-        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.end_node_id);
+    // For simple snarl records, need  11 + 11 + number of bits for the number of children
+    temp_index.max_bits = std::max(temp_index.max_bits, 22 + SnarlDistanceIndex::bit_width(temp_snarl_record.children.size()));   
+} 
+
+void populate_hub_labeling(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph) {
+  CHOverlay ov = make_boost_graph(temp_index, snarl_index, temp_snarl_record, all_children, graph);
+
+#ifdef debug_hub_label_build
+  // Dump CHOverlay graph to stderr for debugging
+  std::cerr << "=== CHOverlay Graph Dump ===" << std::endl;
+  std::cerr << ov << std::endl;
+  std::cerr << "=== End CHOverlay Dump ===" << std::endl;
+#endif
+
+  make_contraction_hierarchy(ov);
+
+  vector<vector<HubRecord>> labels; labels.resize(num_vertices(ov));
+  vector<vector<HubRecord>> labels_rev; labels_rev.resize(num_vertices(ov)); 
+  create_labels(labels, labels_rev, ov);
+#ifdef debug_hub_label_storage
+  std::cerr << "Hub labels unpacked:" << std::endl;
+  for (const auto& node_list : {labels, labels_rev}) {
+    std::cerr << "Labels for all nodes:" << std::endl;
+    for (size_t i = 0; i < node_list.size(); i++) {
+        std::cerr << "\tLabels for rank " << i << ":" << std::endl;
+        for (const HubRecord& label : node_list[i]) {
+            std::cerr << "\t\tHub: " << label.hub << " Dist: " << label.dist << std::endl; 
+        }
     }
+  }
+#endif
+  
+  // Put labels in temp_snarl_record
+  temp_snarl_record.hub_labels = pack_labels(labels, labels_rev);
+#ifdef debug_hub_label_storage
+  std::cerr << "Hub labels as packed: ";
+  for (size_t i = 0; i < temp_snarl_record.hub_labels.size(); i++) {
+    if (i > 0) {
+        std::cerr << " | ";
+    }
+    std::cerr << temp_snarl_record.hub_labels[i];
+  }
+  std::cerr << std::endl;
+#endif
+}
 
-    while (!all_children.empty()) {
-        const pair<SnarlDistanceIndex::temp_record_t, size_t> start_index = std::move(all_children.back());
-        all_children.pop_back();
+void populate_distance_matrix_if_needed(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph, size_t size_limit, bool only_top_level_chain_distances) {
+    // This is only called for non-oversized snarls; oversized snarls go through populate_hub_labeling instead.
+#ifdef debug_distance_indexing
+    assert(size_limit == 0 || temp_snarl_record.node_count <= size_limit);
+#endif
+    if (size_limit != 0 && !only_top_level_chain_distances) {
+      //If we are saving distances
+      //Reserve enough space to store all possible distances. Since we are not oversized, node_count <= size_limit,
+      //so we always need the full node_count * node_count matrix.
+      temp_snarl_record.distances.reserve(temp_snarl_record.node_count * temp_snarl_record.node_count);
+    } else {
+      temp_snarl_record.include_distances = false;
+    }
+    for (auto it = all_children.rbegin(); it != all_children.rend(); ++it) {
+        // Visit all the children in reverse order
+        const SnarlDistanceIndex::temp_record_ref_t& start_index = *it;
 
         bool is_internal_node = false;
 
@@ -1094,21 +1243,44 @@ void populate_snarl_index(
              && start_index.second != temp_snarl_record.start_node_id 
              && start_index.second != temp_snarl_record.end_node_id) 
             || 
-            (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && temp_index.temp_chain_records.at(start_index.second).is_trivial)) {
-            // This is an internal node
+            (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && temp_index.get_chain(start_index).is_trivial)) {
+            // If this is an internal node
             is_internal_node = true;
-        } else if (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && !temp_index.temp_chain_records.at(start_index.second).is_trivial) {
+            nid_t node_id = start_index.first == SnarlDistanceIndex::TEMP_NODE ? start_index.second : temp_index.get_chain(start_index).start_node_id;
+            SnarlDistanceIndex::temp_record_ref_t node_index {SnarlDistanceIndex::TEMP_NODE, node_id};
+            size_t rank = start_index.first == SnarlDistanceIndex::TEMP_NODE ? temp_index.get_node(start_index).rank_in_parent
+                                                          : temp_index.get_chain(start_index).rank_in_parent;
+
+            bool has_edges = false;
+            graph->follow_edges(graph->get_handle(node_id, false), false, [&](const handle_t& next_handle) {
+                has_edges = true;
+            });
+            if (!has_edges) {
+                temp_index.get_node(node_index).is_tip = true;
+                temp_snarl_record.tippy_child_ranks.emplace(rank, false);
+                temp_snarl_record.is_simple=false; //It is a tip so this isn't simple snarl
+            }
+            has_edges = false;
+            graph->follow_edges(graph->get_handle(node_id, true), false, [&](const handle_t& next_handle) {
+                has_edges = true;
+            });
+            if (!has_edges) {
+                temp_index.get_node(node_index).is_tip = true;
+                temp_snarl_record.tippy_child_ranks.emplace(rank, true);
+                temp_snarl_record.is_simple=false; //It is a tip so this isn't simple snarl
+            }
+        } else if (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && !temp_index.get_chain(start_index).is_trivial) {
             // If this is an internal chain, then it isn't a simple snarl
             temp_snarl_record.is_simple=false;
         }
 
         bool start_is_tip = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                      ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).is_tip 
-                      : temp_index.temp_chain_records.at(start_index.second).is_tip;
+                      ? temp_index.get_node(start_index).is_tip 
+                      : temp_index.get_chain(start_index).is_tip;
 
         size_t start_rank = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).rank_in_parent
-                : temp_index.temp_chain_records.at(start_index.second).rank_in_parent;
+                ? temp_index.get_node(start_index).rank_in_parent
+                : temp_index.get_chain(start_index).rank_in_parent;
 
 
         if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.start_node_id) {
@@ -1118,357 +1290,587 @@ void populate_snarl_index(
         } //TODO:
           //else {
           //  assert(start_rank != 0 && start_rank != 1);
-          //}
+          //} 
 
-        if ( (temp_snarl_record.node_count > size_limit || size_limit == 0 || only_top_level_chain_distances) && (temp_snarl_record.is_root_snarl || (!start_is_tip &&
-             start_rank != 0 && start_rank != 1))) {
-            //If we don't care about internal distances, and we also are not at a boundary or tip
+        //traversal start is not a tip or a boundary node
+        bool start_normal_child = (!start_is_tip && start_rank != 0 && start_rank != 1);
+ 
+        if ( (size_limit == 0 || only_top_level_chain_distances) && (temp_snarl_record.is_root_snarl || start_normal_child)) {
+            //We don't care about internal distances, and this child is a root child or a normal (non-boundary, non-tip) child
             //TODO: Why do we care about tips specifically?
             continue;
         }
+        //fill in all distances for a row
+        populate_distance_matrix_row(temp_index, snarl_index, temp_snarl_record, start_index, graph, start_rank, is_internal_node, size_limit);   
+    }                                                                                                                    
+}      
+      
+    
+                        
+void populate_distance_matrix_row(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const SnarlDistanceIndex::temp_record_ref_t& start_index, const HandleGraph* graph, size_t start_rank, bool is_internal_node, size_t size_limit) {
+    /*Helper function to find the ancestor of a node that is a child of this snarl */
+    auto get_ancestor_of_node = [&](SnarlDistanceIndex::temp_record_ref_t curr_index,
+                                    SnarlDistanceIndex::temp_record_ref_t ancestor_snarl_index) {
 
-        //Start from either direction for all nodes, but only going in for start and end
-        vector<bool> directions;
-        if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.start_node_id) {
-            directions.emplace_back(temp_snarl_record.start_node_rev);
-        } else if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.end_node_id){
-            directions.emplace_back(!temp_snarl_record.end_node_rev);
-        } else {
-            directions.emplace_back(true);
-            directions.emplace_back(false);
+        //This is a child that isn't a node, so it must be a chain
+        if (curr_index.second == temp_snarl_record.start_node_id || 
+            curr_index.second == temp_snarl_record.end_node_id) {
+            return curr_index;
         }
-        for (bool start_rev : directions) {
-            //Start a dijkstra traversal from start_index going in the direction indicated by start_rev
-            //Record the distances to each node (child of the snarl) found
-            size_t reachable_node_count = 0; //How many nodes can we reach from this node side?
 
+        //Otherwise, walk up until we hit the current snarl
+        SnarlDistanceIndex::temp_record_ref_t parent_index = temp_index.get_node(curr_index).parent;
+        while (parent_index != ancestor_snarl_index) {
+            curr_index=parent_index;
+            parent_index = parent_index.first == SnarlDistanceIndex::TEMP_SNARL ? temp_index.get_snarl(parent_index).parent
+                                                            : temp_index.get_chain(parent_index).parent;
 #ifdef debug_distance_indexing
-            cerr << "  Starting from child " << temp_index.structure_start_end_as_string(start_index)
-                 << " going " << (start_rev ? "rev" : "fd") << endl;
-#endif
-
-            //Define a NetgraphNode as the value for the priority queue:
-            // <distance, <<type of node, index into temp_node/chain_records>, direction>
-            using NetgraphNode = pair<size_t, pair<pair<SnarlDistanceIndex::temp_record_t, size_t>, bool>>; 
-            auto cmp = [] (const NetgraphNode a, const NetgraphNode b) {
-                return a.first > b.first;
-            };
-
-            //The priority queue of the next nodes to visit, ordered by the distance
-            std::priority_queue<NetgraphNode, vector<NetgraphNode>, decltype(cmp)> queue(cmp);
-            //The nodes we've already visited
-            unordered_set<pair<pair<SnarlDistanceIndex::temp_record_t, size_t>, bool>> visited_nodes;
-            visited_nodes.reserve(temp_snarl_record.node_count * 2);
-
-            //Start from the current start node
-            queue.push(make_pair(0, make_pair(start_index, start_rev)));
-
-            while (!queue.empty()) {
-
-                //Get the current node from the queue and pop it out of the queue
-                size_t current_distance = queue.top().first;
-                pair<SnarlDistanceIndex::temp_record_t, size_t> current_index = queue.top().second.first;
-                bool current_rev = queue.top().second.second;
-                if (visited_nodes.count(queue.top().second)) {
-                    queue.pop();
-                    continue;
-                }
-                visited_nodes.emplace(queue.top().second);
+            assert(parent_index.first != SnarlDistanceIndex::TEMP_ROOT); 
+#endif
+        }
+        
+        return curr_index;
+    }; 
+
+    //Start from either direction for all nodes, but only going in for start and end
+    vector<bool> directions;
+    if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.start_node_id) {
+        directions.emplace_back(temp_snarl_record.start_node_rev);
+    } else if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.end_node_id){
+        directions.emplace_back(!temp_snarl_record.end_node_rev);
+    } else {
+        directions.emplace_back(true);
+        directions.emplace_back(false);
+    }
+    for (bool start_rev : directions) {
+        //Start a dijkstra traversal from start_index going in the direction indicated by start_rev
+        //Record the distances to each node (child of the snarl) found
+        size_t reachable_node_count = 0; //How many nodes can we reach from this node side?
+
+#ifdef debug_distance_indexing
+        cerr << "  Starting from child " << temp_index.structure_start_end_as_string(start_index)
+             << " going " << (start_rev ? "rev" : "fd") << endl;
+#endif
+
+        //Define a NetgraphNode as the value for the priority queue:
+        // <distance, <<type of node, index into temp_node/chain_records>, direction>
+        using NetgraphNode = pair<size_t, pair<SnarlDistanceIndex::temp_record_ref_t, bool>>; 
+        auto cmp = [] (const NetgraphNode a, const NetgraphNode b) {
+            return a.first > b.first;
+        };
+
+        //The priority queue of the next nodes to visit, ordered by the distance
+        std::priority_queue<NetgraphNode, vector<NetgraphNode>, decltype(cmp)> queue(cmp);
+        //The nodes we've already visited
+        unordered_set<pair<SnarlDistanceIndex::temp_record_ref_t, bool>> visited_nodes;
+        visited_nodes.reserve(temp_snarl_record.node_count * 2);
+
+        //Start from the current start node
+        queue.push(make_pair(0, make_pair(start_index, start_rev)));
+
+        while (!queue.empty()) {
+
+            //Get the current node from the queue and pop it out of the queue
+            size_t current_distance = queue.top().first;
+            SnarlDistanceIndex::temp_record_ref_t current_index = queue.top().second.first;
+            bool current_rev = queue.top().second.second;
+            if (visited_nodes.count(queue.top().second)) {
                 queue.pop();
+                continue;
+            }
+            visited_nodes.emplace(queue.top().second);
+            queue.pop();
 
 
-                //The handle that we need to follow to get the next reachable nodes
-                //If the current node is a node, then its just the node. Otherwise, it's the 
-                //opposite side of the child chain
-                handle_t current_end_handle = current_index.first == SnarlDistanceIndex::TEMP_NODE ? 
-                        graph->get_handle(current_index.second, current_rev) :
-                        (current_rev ? graph->get_handle(temp_index.temp_chain_records[current_index.second].start_node_id, 
-                                                        !temp_index.temp_chain_records[current_index.second].start_node_rev) 
-                                  : graph->get_handle(temp_index.temp_chain_records[current_index.second].end_node_id, 
-                                                      temp_index.temp_chain_records[current_index.second].end_node_rev));
+            //The handle that we need to follow to get the next reachable nodes
+            //If the current node is a node, then its just the node. Otherwise, it's the 
+            //opposite side of the child chain
+            handle_t current_end_handle = current_index.first == SnarlDistanceIndex::TEMP_NODE ? 
+                    graph->get_handle(current_index.second, current_rev) :
+                    (current_rev ? graph->get_handle(temp_index.get_chain(current_index).start_node_id, 
+                                                    !temp_index.get_chain(current_index).start_node_rev) 
+                              : graph->get_handle(temp_index.get_chain(current_index).end_node_id, 
+                                                  temp_index.get_chain(current_index).end_node_rev));
 
 #ifdef debug_distance_indexing
-                        cerr << "    at child " << temp_index.structure_start_end_as_string(current_index) << " going "
-                             << (current_rev ? "rev" : "fd") << " at actual node " << graph->get_id(current_end_handle) 
-                             << (graph->get_is_reverse(current_end_handle) ? "rev" : "fd") << endl;
+                cerr << "    at child " << temp_index.structure_start_end_as_string(current_index) << " going "
+                     << (current_rev ? "rev" : "fd") << " at actual node " << graph->get_id(current_end_handle) 
+                     << (graph->get_is_reverse(current_end_handle) ? "rev" : "fd") << endl;
 #endif
-                graph->follow_edges(current_end_handle, false, [&](const handle_t next_handle) {
-                    if (graph->get_id(current_end_handle) == graph->get_id(next_handle)){
-                        //If this loops onto the same node side then this isn't a simple snarl
-                        temp_snarl_record.is_simple = false;
-                    } else if ((current_index.first == SnarlDistanceIndex::TEMP_NODE ? current_index.second 
-                                                                                     : (current_rev ? temp_index.temp_chain_records[current_index.second].end_node_id
-                                                                                                    : temp_index.temp_chain_records[current_index.second].start_node_id))
-                                    == graph->get_id(next_handle)){
-                        //If this loops to the other end of the chain then this isn't a simple snarl
-                        temp_snarl_record.is_simple = false;
-                    } else if (!temp_snarl_record.is_root_snarl && start_rank == 0 && 
-                               current_index != start_index && graph->get_id(next_handle) != temp_snarl_record.end_node_id) {
-                        //If the starting point of this traversal was the start of the snarl, the current starting point is not the start node,
-                        //and we found another child, then this is not a simple snarl
-                        temp_snarl_record.is_simple = false;
-                    } else if (!temp_snarl_record.is_root_snarl && start_rank == 1 && 
-                               current_index != start_index && graph->get_id(next_handle) != temp_snarl_record.start_node_id) {
-                        //If the starting point of this traversal was the end of the snarl, the current starting point is not the end node,
-                        //and we found another child, then this is not a simple snarl
+            graph->follow_edges(current_end_handle, false, [&](const handle_t& next_handle) {
+#ifdef debug_distance_indexing
+                cerr << "      see edge " << graph->get_id(current_end_handle) 
+                     << (graph->get_is_reverse(current_end_handle) ? "rev" : "fd")
+                     << " -> " << graph->get_id(next_handle) 
+                     << (graph->get_is_reverse(next_handle) ? "rev" : "fd") << endl;
+#endif
+
+                if (graph->get_id(current_end_handle) == graph->get_id(next_handle)) {
+                    //If this loops onto the same node side then this isn't a simple snarl
+                    temp_snarl_record.is_simple = false;
+                } else if ((current_index.first == SnarlDistanceIndex::TEMP_NODE ? current_index.second 
+                                                                                 : (current_rev ? temp_index.get_chain(current_index).end_node_id
+                                                                                                : temp_index.get_chain(current_index).start_node_id))
+                                == graph->get_id(next_handle)){
+                    //If this loops to the other end of the chain then this isn't a simple snarl
+                    temp_snarl_record.is_simple = false;
+                } else if (!temp_snarl_record.is_root_snarl && start_rank == 0 && 
+                           current_index != start_index && graph->get_id(next_handle) != temp_snarl_record.end_node_id) {
+                    //If the starting point of this traversal was the start of the snarl, the current starting point is not the start node,
+                    //and we found another child, then this is not a simple snarl
+                    temp_snarl_record.is_simple = false;
+                } else if (!temp_snarl_record.is_root_snarl && start_rank == 1 && 
+                           current_index != start_index && graph->get_id(next_handle) != temp_snarl_record.start_node_id) {
+                    //If the starting point of this traversal was the end of the snarl, the current starting point is not the end node,
+                    //and we found another child, then this is not a simple snarl
+                    temp_snarl_record.is_simple = false;
+                }
+
+                reachable_node_count++;
+
+                SnarlDistanceIndex::temp_record_ref_t next_node_index = make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(next_handle));
+
+                //At each of the nodes reachable from the current one, fill in the distance from the start
+                //node to the next node (current_distance). If this handle isn't leaving the snarl,
+                //add the next nodes along with the distance to the end of the next node
+                auto& node_record = temp_index.get_node(next_node_index);
+
+                //The index of the snarl's child that next_handle represents
+                SnarlDistanceIndex::temp_record_ref_t next_index = 
+                    get_ancestor_of_node(next_node_index, snarl_index); 
+
+                bool next_is_tip = start_index.first == SnarlDistanceIndex::TEMP_NODE 
+                          ? temp_index.get_node(start_index).is_tip 
+                          : temp_index.get_chain(start_index).is_tip;
+
+                //The rank and orientation of next in the snarl
+                size_t next_rank = next_index.first == SnarlDistanceIndex::TEMP_NODE 
+                        ? node_record.rank_in_parent
+                        : temp_index.get_chain(next_index).rank_in_parent;
+                if (next_index.first == SnarlDistanceIndex::TEMP_NODE && next_index.second == temp_snarl_record.start_node_id) {
+#ifdef debug_distance_indexing
+                    std::cerr << "        edge arrived at start" << std::endl;
+#endif
+                    next_rank = 0;
+                } else if (next_index.first == SnarlDistanceIndex::TEMP_NODE && next_index.second == temp_snarl_record.end_node_id) {
+#ifdef debug_distance_indexing
+                    std::cerr << "        edge arrived at end" << std::endl;
+#endif
+                    next_rank = 1;
+                } else {
+                    //If the next thing wasn't a boundary node and this was an internal node, then it isn't a simple snarl
+                    if (is_internal_node) {
                         temp_snarl_record.is_simple = false;
                     }
+                }//TODO: This won't be true of root snarls 
+                  //else {
+                  //  assert(next_rank != 0 && next_rank != 1);
+                  //}
+                bool next_rev = next_index.first == SnarlDistanceIndex::TEMP_NODE || temp_index.get_chain(next_index).is_trivial 
+                        ? graph->get_is_reverse(next_handle) 
+                        : graph->get_id(next_handle) == temp_index.get_chain(next_index).end_node_id;
+                
+                /**Record the distance **/
+                bool start_is_boundary = !temp_snarl_record.is_root_snarl && (start_rank == 0 || start_rank == 1);
+                bool next_is_boundary = !temp_snarl_record.is_root_snarl && (next_rank == 0 || next_rank == 1);
+
+                pair<size_t, bool> start = start_is_boundary 
+                    ? make_pair(start_rank, false) : make_pair(start_rank, !start_rev);
+                pair<size_t, bool> next = next_is_boundary 
+                    ? make_pair(next_rank, false) : make_pair(next_rank, next_rev);
+
+                if (size_limit == 0 && start_is_boundary && next_is_boundary) {
+                    // If not measuring distances, we need to use
+                    // distance_start_start and distance_end_end as
+                    // connectivity flags so we can still detect reversals
+                    // within chains and recognize regular snarls.
+                    if (start_rank == 0 && next_rank == 0) {
+                        temp_snarl_record.distance_start_start = 0;
+#ifdef debug_distance_indexing
+                        cerr << "        set loop indicator start start distance " << temp_snarl_record.distance_start_start << endl;
+#endif
+                    } else if (start_rank == 1 && next_rank == 1) {
+                        temp_snarl_record.distance_end_end = 0;
+#ifdef debug_distance_indexing
+                        cerr << "        set loop indicator end end distance " << temp_snarl_record.distance_start_start << endl;
+#endif
+                    }
+                } else if (size_limit != 0 &&
+                    (temp_snarl_record.node_count <= size_limit || start_is_boundary || next_is_boundary)) {
+                    //If the snarl is too big, then we don't record distances between internal nodes
+                    //If we are looking at all distances or we are looking at boundaries
+                    bool added_new_distance = false;
+
+                    //Set the distance
+                    if (start_is_boundary && next_is_boundary) {
+                        //If it is between bounds of the snarl, then the snarl stores it
+                        if (start_rank == 0 && next_rank == 0 && 
+                            temp_snarl_record.distance_start_start == std::numeric_limits<size_t>::max()) {
+                            temp_snarl_record.distance_start_start = current_distance;
+#ifdef debug_distance_indexing
+                            cerr << "        set start start distance " << temp_snarl_record.distance_start_start << endl;
+#endif
+                            added_new_distance = true;
+                        } else if (start_rank == 1 && next_rank == 1 && 
+                                   temp_snarl_record.distance_end_end == std::numeric_limits<size_t>::max()) {
+                            temp_snarl_record.distance_end_end = current_distance;
+#ifdef debug_distance_indexing
+                            cerr << "        set end end distance " << temp_snarl_record.distance_start_start << endl;
+#endif
+                            added_new_distance = true;
+                        } else if (((start_rank == 0 && next_rank == 1) || (start_rank == 1 && next_rank == 0))
+                                    && temp_snarl_record.min_length == std::numeric_limits<size_t>::max()){
+                            temp_snarl_record.min_length = current_distance;
+                            added_new_distance = true;
 
-                    reachable_node_count++;
-                    //At each of the nodes reachable from the current one, fill in the distance from the start
-                    //node to the next node (current_distance). If this handle isn't leaving the snarl,
-                    //add the next nodes along with the distance to the end of the next node
-                    auto& node_record = temp_index.temp_node_records.at(graph->get_id(next_handle)-temp_index.min_node_id);
-
-                    //The index of the snarl's child that next_handle represents
-                    pair<SnarlDistanceIndex::temp_record_t, size_t> next_index = 
-                        get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(next_handle)), snarl_index); 
-
-                    bool next_is_tip = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                              ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).is_tip 
-                              : temp_index.temp_chain_records.at(start_index.second).is_tip;
-
-                    //The rank and orientation of next in the snarl
-                    size_t next_rank = next_index.first == SnarlDistanceIndex::TEMP_NODE 
-                            ? node_record.rank_in_parent
-                            : temp_index.temp_chain_records[next_index.second].rank_in_parent;
-                    if (next_index.first == SnarlDistanceIndex::TEMP_NODE && next_index.second == temp_snarl_record.start_node_id) {
-                        next_rank = 0;
-                    } else if (next_index.first == SnarlDistanceIndex::TEMP_NODE && next_index.second == temp_snarl_record.end_node_id) {
-                        next_rank = 1;
-                    } else {
-                        //If the next thing wasn't a boundary node and this was an internal node, then it isn't a simple snarl
-                        if (is_internal_node) {
-                            temp_snarl_record.is_simple = false;
                         }
-                    }//TODO: This won't be true of root snarls 
-                      //else {
-                      //  assert(next_rank != 0 && next_rank != 1);
-                      //}
-                    bool next_rev = next_index.first == SnarlDistanceIndex::TEMP_NODE || temp_index.temp_chain_records[next_index.second].is_trivial 
-                            ? graph->get_is_reverse(next_handle) 
-                            : graph->get_id(next_handle) == temp_index.temp_chain_records[next_index.second].end_node_id;
-                    
-                    /**Record the distance **/
-                    bool start_is_boundary = !temp_snarl_record.is_root_snarl && (start_rank == 0 || start_rank == 1);
-                    bool next_is_boundary = !temp_snarl_record.is_root_snarl && (next_rank == 0 || next_rank == 1);
-
-                    if (size_limit != 0 &&
-                        (temp_snarl_record.node_count <= size_limit || start_is_boundary || next_is_boundary)) {
-                        //If the snarl is too big, then we don't record distances between internal nodes
-                        //If we are looking at all distances or we are looking at boundaries
-                        bool added_new_distance = false;
-
-                        //Set the distance
-                        pair<size_t, bool> start = start_is_boundary 
-                            ? make_pair(start_rank, false) : make_pair(start_rank, !start_rev);
-                        pair<size_t, bool> next = next_is_boundary 
-                            ? make_pair(next_rank, false) : make_pair(next_rank, next_rev);
-                        if (start_is_boundary && next_is_boundary) {
-                            //If it is between bounds of the snarl, then the snarl stores it
-                            if (start_rank == 0 && next_rank == 0 && 
-                                temp_snarl_record.distance_start_start == std::numeric_limits<size_t>::max()) {
-                                temp_snarl_record.distance_start_start = current_distance;
-                                added_new_distance = true;
-                            } else if (start_rank == 1 && next_rank == 1 && 
-                                       temp_snarl_record.distance_end_end == std::numeric_limits<size_t>::max()) {
-                                temp_snarl_record.distance_end_end = current_distance;
-                                added_new_distance = true;
-                            } else if (((start_rank == 0 && next_rank == 1) || (start_rank == 1 && next_rank == 0))
-                                        && temp_snarl_record.min_length == std::numeric_limits<size_t>::max()){
-                                temp_snarl_record.min_length = current_distance;
+                    } else if (start_is_boundary){
+                        //If start is a boundary node
+                        if (next_index.first == SnarlDistanceIndex::TEMP_NODE) {
+                            //Next is a node
+                            auto& temp_node_record = temp_index.get_node(next_index);
+                            if (start_rank == 0 && !next_rev &&
+                                    temp_node_record.distance_left_start == std::numeric_limits<size_t>::max()) {
+                                temp_node_record.distance_left_start = current_distance;
                                 added_new_distance = true;
-
+                            } else if (start_rank == 0 && next_rev &&
+                                    temp_node_record.distance_right_start == std::numeric_limits<size_t>::max()) {
+                                temp_node_record.distance_right_start = current_distance;
+                                added_new_distance = true; 
+                            } else if (start_rank == 1 && !next_rev &&
+                                    temp_node_record.distance_left_end == std::numeric_limits<size_t>::max()) {
+                                temp_node_record.distance_left_end = current_distance;
+                                added_new_distance = true; 
+                            } else if (start_rank == 1 && next_rev &&
+                                    temp_node_record.distance_right_end == std::numeric_limits<size_t>::max()) {
+                                temp_node_record.distance_right_end = current_distance;
+                                added_new_distance = true; 
                             }
-                        } else if (start_is_boundary){
-                            //If start is a boundary node
-                            if (next_index.first == SnarlDistanceIndex::TEMP_NODE) {
-                                //Next is a node
-                                auto& temp_node_record = temp_index.temp_node_records.at(next_index.second-temp_index.min_node_id);
-                                if (start_rank == 0 && !next_rev &&
-                                        temp_node_record.distance_left_start == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_left_start = current_distance;
-                                    added_new_distance = true;
-                                } else if (start_rank == 0 && next_rev &&
-                                        temp_node_record.distance_right_start == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_right_start = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && !next_rev &&
-                                        temp_node_record.distance_left_end == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_left_end = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && next_rev &&
-                                        temp_node_record.distance_right_end == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_right_end = current_distance;
-                                    added_new_distance = true; 
-                                }
-                            }  else {
-                                //Next is a chain
-                                auto& temp_chain_record = temp_index.temp_chain_records.at(next_index.second);
-                                if (start_rank == 0 && !next_rev &&
-                                        temp_chain_record.distance_left_start == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_left_start = current_distance;
-                                    added_new_distance = true;
-                                } else if (start_rank == 0 && next_rev &&
-                                        temp_chain_record.distance_right_start == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_right_start = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && !next_rev &&
-                                        temp_chain_record.distance_left_end == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_left_end = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && next_rev &&
-                                        temp_chain_record.distance_right_end == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_right_end = current_distance;
-                                    added_new_distance = true; 
-                                }
+                        }  else {
+                            //Next is a chain
+                            auto& temp_chain_record = temp_index.get_chain(next_index);
+                            if (start_rank == 0 && !next_rev &&
+                                    temp_chain_record.distance_left_start == std::numeric_limits<size_t>::max()) {
+                                temp_chain_record.distance_left_start = current_distance;
+                                added_new_distance = true;
+                            } else if (start_rank == 0 && next_rev &&
+                                    temp_chain_record.distance_right_start == std::numeric_limits<size_t>::max()) {
+                                temp_chain_record.distance_right_start = current_distance;
+                                added_new_distance = true; 
+                            } else if (start_rank == 1 && !next_rev &&
+                                    temp_chain_record.distance_left_end == std::numeric_limits<size_t>::max()) {
+                                temp_chain_record.distance_left_end = current_distance;
+                                added_new_distance = true; 
+                            } else if (start_rank == 1 && next_rev &&
+                                    temp_chain_record.distance_right_end == std::numeric_limits<size_t>::max()) {
+                                temp_chain_record.distance_right_end = current_distance;
+                                added_new_distance = true; 
                             }
-                        } else if (!next_is_boundary && !temp_snarl_record.distances.count(make_pair(start, next))) {
-                            //Otherwise the snarl stores it in its distance
-                            //If the distance isn't from an internal node to a bound and we haven't stored the distance yet
+                        }
+                    } else if (!next_is_boundary && !temp_snarl_record.distances.count(make_pair(start, next))) {
+                        //Otherwise the snarl stores it in its distance
+                        //If the distance isn't from an internal node to a bound and we haven't stored the distance yet
 
-                            temp_snarl_record.distances[make_pair(start, next)] = current_distance;
-                            added_new_distance = true;
+                        temp_snarl_record.distances[make_pair(start, next)] = current_distance;
+                        added_new_distance = true;
 #ifdef debug_distance_indexing
-                            cerr << "           Adding distance between ranks " << start.first << " " << start.second << " and " << next.first << " " << next.second << ": " << current_distance << endl;
+                        cerr << "           Adding distance between ranks " << start.first << " " << start.second << " and " << next.first << " " << next.second << ": " << current_distance << endl;
 #endif
-                        }
-                        if (added_new_distance) {
-                            temp_snarl_record.max_distance = std::max(temp_snarl_record.max_distance, current_distance);
-                        }
                     }
+                    if (added_new_distance) {
+                        temp_snarl_record.max_distance = std::max(temp_snarl_record.max_distance, current_distance);
+                    }
+                }
 
 
-                    /**Add the next node to the priority queue**/
-
-                    if (visited_nodes.count(make_pair(next_index, next_rev)) == 0 &&
+                /**Add the next node to the priority queue**/
+
+                if (visited_nodes.count(make_pair(next_index, next_rev)) == 0 &&
+                    graph->get_id(next_handle) != temp_snarl_record.start_node_id &&
+                    graph->get_id(next_handle) != temp_snarl_record.end_node_id
+                    ) {
+                    //If this isn't leaving the snarl,
+                    //then add the next node to the queue, along with the distance to traverse it
+                    size_t next_node_length = next_index.first == SnarlDistanceIndex::TEMP_NODE ? graph->get_length(next_handle) :
+                                    temp_index.get_chain(next_index).min_length;
+                    if (next_index.first == SnarlDistanceIndex::TEMP_CHAIN &&
+                        temp_index.get_chain(next_index).chain_components.back() != 0) {
+                        //If there are multiple components, then the chain is not start-end reachable so its length
+                        //is actually infinite
+                        next_node_length = std::numeric_limits<size_t>::max();
+                    }
+                    if (next_node_length != std::numeric_limits<size_t>::max()) {
+                        queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, next_node_length), 
+                                       make_pair(next_index, next_rev)));
+                    }
+                }
+                if (next_index.first == SnarlDistanceIndex::TEMP_CHAIN) {
+                    size_t loop_distance = next_rev ? temp_index.get_chain(next_index).backward_loops.back() 
+                                                     : temp_index.get_chain(next_index).forward_loops.front();
+                    if (loop_distance != std::numeric_limits<size_t>::max() &&
+                        visited_nodes.count(make_pair(next_index, !next_rev)) == 0 &&
                         graph->get_id(next_handle) != temp_snarl_record.start_node_id &&
                         graph->get_id(next_handle) != temp_snarl_record.end_node_id
                         ) {
-                        //If this isn't leaving the snarl,
-                        //then add the next node to the queue, along with the distance to traverse it
-                        size_t next_node_length = next_index.first == SnarlDistanceIndex::TEMP_NODE ? graph->get_length(next_handle) :
-                                        temp_index.temp_chain_records[next_index.second].min_length;
-                        if (next_index.first == SnarlDistanceIndex::TEMP_CHAIN &&
-                            temp_index.temp_chain_records[next_index.second].chain_components.back() != 0) {
-                            //If there are multiple components, then the chain is not start-end reachable so its length
-                            //is actually infinite
-                            next_node_length = std::numeric_limits<size_t>::max();
-                        }
-                        if (next_node_length != std::numeric_limits<size_t>::max()) {
-                            queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, next_node_length), 
-                                           make_pair(next_index, next_rev)));
-                        }
-                    }
-                    if (next_index.first == SnarlDistanceIndex::TEMP_CHAIN) {
-                        size_t loop_distance = next_rev ? temp_index.temp_chain_records[next_index.second].backward_loops.back() 
-                                                         : temp_index.temp_chain_records[next_index.second].forward_loops.front();
-                        if (loop_distance != std::numeric_limits<size_t>::max() &&
-                            visited_nodes.count(make_pair(next_index, !next_rev)) == 0 &&
-                            graph->get_id(next_handle) != temp_snarl_record.start_node_id &&
-                            graph->get_id(next_handle) != temp_snarl_record.end_node_id
-                            ) {
-                            //If the next node can loop back on itself, then add the next node in the opposite direction
-                            size_t next_node_len = loop_distance + 2 * graph->get_length(next_handle);
-                            queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, next_node_len), 
-                                           make_pair(next_index, !next_rev)));
-                        }
+                        //If the next node can loop back on itself, then add the next node in the opposite direction
+                        size_t next_node_len = loop_distance + 2 * graph->get_length(next_handle);
+                        queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, next_node_len), 
+                                       make_pair(next_index, !next_rev)));
                     }
+                }
 #ifdef debug_distance_indexing
-                    cerr << "        reached child " << temp_index.structure_start_end_as_string(next_index) << "going " 
-                         << (next_rev ? "rev" : "fd") << " with distance " << current_distance << " for ranks " << start_rank << " " << next_rank << endl;
+                cerr << "        reached child " << temp_index.structure_start_end_as_string(next_index) << " going " 
+                     << (next_rev ? "rev" : "fd") << " with distance " << current_distance << " for ranks " << start_rank << " " << next_rank << endl;
 #endif
-                });
-            }
-            if (is_internal_node && reachable_node_count != 1) {
-                //If this is an internal node, then it must have only one edge for it to be a simple snarl
-                temp_snarl_record.is_simple = false;
-            }
+            });
+        }
+        if (is_internal_node && reachable_node_count != 1) {
+            //If this is an internal node, then it must have only one edge for it to be a simple snarl
+            temp_snarl_record.is_simple = false;
         }
+    }
 
-        /** Check the minimum length of the snarl passing through this node **/
-        if (start_rank != 0 && start_rank != 1) {
-
-            size_t child_max_length = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).node_length
-                : temp_index.temp_chain_records.at(start_index.second).max_length;
-            //The distance through the whole snarl traversing this node forwards
-            //(This might actually be traversing it backwards but it doesn't really matter)
-
-            size_t dist_start_left = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_left_start
-                : temp_index.temp_chain_records.at(start_index.second).distance_left_start;
-            size_t dist_end_right = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_right_end
-                : temp_index.temp_chain_records.at(start_index.second).distance_right_end;
-            size_t dist_start_right =  start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_right_start
-                : temp_index.temp_chain_records.at(start_index.second).distance_right_start;
-            size_t dist_end_left = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_left_end
-                : temp_index.temp_chain_records.at(start_index.second).distance_left_end;
-
-            size_t snarl_length_fd = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                    dist_start_left, dist_end_right),child_max_length);
-            //The same thing traversing this node backwards
-            size_t snarl_length_rev = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                    dist_start_right, dist_end_left), child_max_length);
-            //The max that isn't infinite
-            size_t max_length = 
-                snarl_length_rev == std::numeric_limits<size_t>::max() 
-                ? snarl_length_fd 
-                : (snarl_length_fd == std::numeric_limits<size_t>::max() 
-                        ? snarl_length_rev 
-                        : std::max(snarl_length_rev, snarl_length_fd));
-            if (max_length != std::numeric_limits<size_t>::max()) {
-                temp_snarl_record.max_length = std::max(temp_snarl_record.max_length, max_length);
-            }
-            if ( temp_snarl_record.is_simple && 
-                ! ((dist_start_left == 0 && dist_end_right == 0 && dist_end_left == std::numeric_limits<size_t>::max() && dist_start_right == std::numeric_limits<size_t>::max() ) || 
-                   (dist_start_left == std::numeric_limits<size_t>::max() && dist_end_right == std::numeric_limits<size_t>::max() && dist_end_left == 0 && dist_start_right == 0 ))){
-                //If the snarl is simple, double check that this node is actually simple: that it can only be traversed going
-                //across the nsarl
-                temp_snarl_record.is_simple = false;
-            }
+    /** Check the minimum length of the snarl passing through this node **/
+    if (start_rank != 0 && start_rank != 1) {
+
+        size_t child_max_length = start_index.first == SnarlDistanceIndex::TEMP_NODE 
+            ? temp_index.get_node(start_index).node_length
+            : temp_index.get_chain(start_index).max_length;
+        //The distance through the whole snarl traversing this node forwards
+        //(This might actually be traversing it backwards but it doesn't really matter)
+
+        size_t dist_start_left = start_index.first == SnarlDistanceIndex::TEMP_NODE 
+            ? temp_index.get_node(start_index).distance_left_start
+            : temp_index.get_chain(start_index).distance_left_start;
+        size_t dist_end_right = start_index.first == SnarlDistanceIndex::TEMP_NODE 
+            ? temp_index.get_node(start_index).distance_right_end
+            : temp_index.get_chain(start_index).distance_right_end;
+        size_t dist_start_right =  start_index.first == SnarlDistanceIndex::TEMP_NODE 
+            ? temp_index.get_node(start_index).distance_right_start
+            : temp_index.get_chain(start_index).distance_right_start;
+        size_t dist_end_left = start_index.first == SnarlDistanceIndex::TEMP_NODE 
+            ? temp_index.get_node(start_index).distance_left_end
+            : temp_index.get_chain(start_index).distance_left_end;
+
+        size_t snarl_length_fd = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+                dist_start_left, dist_end_right),child_max_length);
+        //The same thing traversing this node backwards
+        size_t snarl_length_rev = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+                dist_start_right, dist_end_left), child_max_length);
+        //The max that isn't infinite
+        size_t max_length = 
+            snarl_length_rev == std::numeric_limits<size_t>::max() 
+            ? snarl_length_fd 
+            : (snarl_length_fd == std::numeric_limits<size_t>::max() 
+                    ? snarl_length_rev 
+                    : std::max(snarl_length_rev, snarl_length_fd));
+        if (max_length != std::numeric_limits<size_t>::max()) {
+            temp_snarl_record.max_length = std::max(temp_snarl_record.max_length, max_length);
+        }
+        if ( temp_snarl_record.is_simple && 
+            ! ((dist_start_left == 0 && dist_end_right == 0 && dist_end_left == std::numeric_limits<size_t>::max() && dist_start_right == std::numeric_limits<size_t>::max() ) || 
+               (dist_start_left == std::numeric_limits<size_t>::max() && dist_end_right == std::numeric_limits<size_t>::max() && dist_end_left == 0 && dist_start_right == 0 ))){
+            //If the snarl is simple, double check that this node is actually simple: that it can only be traversed going
+            //across the nsarl
+            temp_snarl_record.is_simple = false;
         }
     }
+}
 
+bool check_regularity(const SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph) {
+#ifdef debug_distance_indexing
+    std::cerr << "Check if snarl " << temp_snarl_record.start_node_id << " to " << temp_snarl_record.end_node_id << " with " << all_children.size() << " children is regular" << std::endl;
+#endif
 
-    //If this is a simple snarl (one with only single nodes that connect to the start and end nodes), then
-    // we want to remember if the child nodes are reversed 
+    if (temp_snarl_record.is_root_snarl) {
+        // Roots can't be regular.
+#ifdef debug_distance_indexing
+        std::cerr << "Snarl is not regular because it is a root snarl." << std::endl;
+#endif
+        return false;
+    }
     if (temp_snarl_record.is_simple) {
-        for (size_t i = 0 ; i < temp_snarl_record.node_count ; i++) {
-            //Get the index of the child
-            const pair<SnarlDistanceIndex::temp_record_t, size_t>& child_index = temp_snarl_record.children[i];
-            //Which is a node
+        // Simple snarls are always also regular.
 #ifdef debug_distance_indexing
-            assert(child_index.first == SnarlDistanceIndex::TEMP_NODE);
+        std::cerr << "Snarl is regular because it is simple." << std::endl;
 #endif
+        return true;
+    }
 
-            //And get the record
-            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record =
-                 temp_index.temp_node_records[child_index.second-temp_index.min_node_id];
-            size_t rank =temp_node_record.rank_in_parent;
+    // Get the snarl boundary nodes, facing out
+    handle_t start_out = graph->get_handle(temp_snarl_record.start_node_id, !temp_snarl_record.start_node_rev);
+    handle_t end_out = graph->get_handle(temp_snarl_record.end_node_id, temp_snarl_record.end_node_rev);
+
+    // Define accessors to get bounding graph handles for children, facing out.
+    auto child_start_out = [&](const SnarlDistanceIndex::temp_record_ref_t& child_index) {
+        return child_index.first == SnarlDistanceIndex::TEMP_NODE ? 
+            graph->get_handle(child_index.second, true) :
+            graph->get_handle(
+                temp_index.get_chain(child_index).start_node_id,
+                !temp_index.get_chain(child_index).start_node_rev
+            );
+    };
+    auto child_end_out = [&](const SnarlDistanceIndex::temp_record_ref_t& child_index) {
+        return child_index.first == SnarlDistanceIndex::TEMP_NODE ? 
+            graph->get_handle(child_index.second, false) :
+            graph->get_handle(
+                temp_index.get_chain(child_index).end_node_id,
+                temp_index.get_chain(child_index).end_node_rev
+            );
+    };
 
-            
+    for (const SnarlDistanceIndex::temp_record_ref_t& child_index : all_children) {
+        // We should only have nodes and chains as children
+        assert(child_index.first == SnarlDistanceIndex::TEMP_NODE
+            || child_index.first == SnarlDistanceIndex::TEMP_CHAIN);
+        if (child_index.first == SnarlDistanceIndex::TEMP_NODE
+            && (child_index.second == temp_snarl_record.start_node_id
+                || child_index.second == temp_snarl_record.end_node_id)) {
+            // Don't think about children for the snarl bounds now; we handle the bounds later.
+            continue;
+        }
 
-            //Set the orientation of this node in the simple snarl
-            temp_node_record.reversed_in_parent = temp_node_record.distance_left_start == std::numeric_limits<size_t>::max();
+        // Have we seen the snarl start?
+        bool saw_start = false;
+        // Have we seen the snarl end?
+        bool saw_end = false;
+        // Have we seen anything else, or a duplicate snarl boundary?
+        bool saw_other = false;
+
+        auto handle_destination = [&](const handle_t& next_handle) {
+#ifdef debug_distance_indexing
+            std::cerr << "\tConnects to " << graph->get_id(next_handle) << (graph->get_is_reverse(next_handle) ? "-" : "+") << std::endl;
+#endif
+
+            // Every edge out the end the child must go to a snarl boundary out
+            // that hasn't been reached yet.
+            if (next_handle == start_out && !saw_start) {
+                saw_start = true;
+#ifdef debug_distance_indexing
+                std::cerr << "\t\tThis is a new connection to snarl start" << std::endl;
+#endif
+                return true;
+            } else if (next_handle == end_out && !saw_end) {
+                saw_end = true;
+#ifdef debug_distance_indexing
+                std::cerr << "\t\tThis is a new connection to snarl end" << std::endl;
+#endif
+                return true;
+            } else {
+                saw_other = true;
+                // We don't care if we have an edge going the right way because
+                // we found an edge going the wrong way.
+#ifdef debug_distance_indexing
+                std::cerr << "\t\tThis is an unwanted connection!" << std::endl;
+#endif
+                return false;
+            }
+        };
+        
+        // Check the edges off the child start
+        handle_t here = child_start_out(child_index);
+#ifdef debug_distance_indexing
+            std::cerr << "Look right from " << graph->get_id(here) << (graph->get_is_reverse(here) ? "-" : "+") << std::endl;
+#endif
+        graph->follow_edges(here, false, handle_destination);
+
+        if (saw_other || !(saw_start != saw_end)) {
+            // We have an edge we shouldn't, or we don't connect to exactly one boundary.
+#ifdef debug_distance_indexing
+            std::cerr << "\tWe must not be regular" << std::endl;
+#endif
+            return false;
+        }
+        
+        // Check the edges off the child end
+        here = child_end_out(child_index);
+#ifdef debug_distance_indexing
+            std::cerr << "Look right from " << graph->get_id(here) << (graph->get_is_reverse(here) ? "-" : "+") << std::endl;
+#endif
+        graph->follow_edges(here, false, handle_destination);
+
+        if (saw_other || !saw_start || !saw_end) {
+            // We have an edge we shouldn't, or we haven't reached both
+            // boundaries exactly once across the two ends of the child.
+#ifdef debug_distance_indexing
+            std::cerr << "\tWe must not be regular" << std::endl;
+#endif
+            return false;
+        }
+
+        if (child_index.first == SnarlDistanceIndex::TEMP_CHAIN) {
+            // If a child is a chain, check it for loops
+#ifdef debug_distance_indexing
+            std::cerr << "Check child chain for loops." << std::endl;
+#endif
+            const SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(child_index);
+#ifdef debug_distance_indexing
+            std::cerr << "Forward loops:";
+            for (auto& l : temp_chain_record.forward_loops) {
+                std::cerr << " " << l;
+            }
+            std::cerr << std::endl;
+#endif
 
+            if (!temp_chain_record.forward_loops.empty() && temp_chain_record.forward_loops.front() != std::numeric_limits<size_t>::max()) {
+                // There's a forward loop in this child chain, so the snarl's not regular.
+#ifdef debug_distance_indexing
+                std::cerr << "We are not regular because there's a forward loop in this child chain." << std::endl;
+#endif
+                return false;
+            }
+
+#ifdef debug_distance_indexing
+            std::cerr << "Backward loops:";
+            for (auto& l : temp_chain_record.backward_loops) {
+                std::cerr << " " << l;
+            }
+            std::cerr << std::endl;
+#endif
+
+            if (!temp_chain_record.backward_loops.empty() && temp_chain_record.backward_loops.back() != std::numeric_limits<size_t>::max()) {
+                // There's a backward loop in this child chain, so the snarl's not regular.
+#ifdef debug_distance_indexing
+                std::cerr << "We are not regular because there's a backward loop in this child chain." << std::endl;
+#endif
+                return false;
+            }
         }
     }
 
-    //Now that the distances are filled in, predict the size of the snarl in the index
-    temp_index.max_index_size += temp_snarl_record.get_max_record_length();
-    if (temp_snarl_record.is_simple) {
-        temp_index.max_index_size -= (temp_snarl_record.children.size() * SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length());
+    // Now we know the children are fine; check for disallowed edges between
+    // the sentinels.
+
+    handle_t start_in = graph->flip(start_out);
+    if (graph->has_edge(start_in, start_out)) {
+#ifdef debug_distance_indexing
+        std::cerr << "We are not regular because we have a start-start loop." << std::endl;
+#endif
+        return false;
     }
 
-    // For simple snarl records, need  11 + 11 + number of bits for the number of children
-    temp_index.max_bits = std::max(temp_index.max_bits, 22 + SnarlDistanceIndex::bit_width(temp_snarl_record.children.size())); 
+    handle_t end_in = graph->flip(end_out);
+    if (graph->has_edge(end_in, end_out)) {
+#ifdef debug_distance_indexing
+        std::cerr << "We are not regular because we have an end-end loop." << std::endl;
+#endif
+        return false;
+    }
+
+    // If we don't have any disallowed edges, and we don't have any children
+    // without the exact right connectivity, we must be regular.
+
+    // We don't make sure we actually had any children.
+    
+#ifdef debug_distance_indexing
+    std::cerr << "We are a regular snarl." << std::endl;
+#endif
+
+    return true;
 }
 
 
+
 //Given an alignment to a graph and a range, find the set of nodes in the
 //graph for which the minimum distance from the position to any position
 //in the node is within the given distance range
@@ -1541,6 +1943,7 @@ cerr << "Start positon: "<< start_pos << endl;
     while (!distance_index.is_root(parent)) {
 #ifdef debug_subgraph
         cerr << "At child " << distance_index.net_handle_as_string(current_net) << " with distances " << current_distance_left << " " << current_distance_right << endl;
+        cerr << "Parent is " << distance_index.net_handle_as_string(parent) << " at offset " << SnarlDistanceIndex::get_record_offset(parent) << endl;
 #endif
 
         size_t max_parent_length = distance_index.maximum_length(parent);
@@ -1568,7 +1971,7 @@ cerr << "Start positon: "<< start_pos << endl;
             if (distance_index.is_snarl(parent)) {
                 //If this is the child of a snarl, then just traverse from the end of the node
 #ifdef debug_subgraph
-cerr << "Start search in parent " << distance_index.net_handle_as_string(parent);
+                cerr << "Start search in parent " << distance_index.net_handle_as_string(parent);
 #endif
                 if (current_distance_left != std::numeric_limits<size_t>::max() ){
                     //If we can go left
@@ -1615,7 +2018,7 @@ cerr << "Start search in parent " << distance_index.net_handle_as_string(parent)
 #endif
             } else {
 #ifdef debug_subgraph
-cerr << "Start search along parent chain " << distance_index.net_handle_as_string(parent);
+                cerr << "Start search along parent chain " << distance_index.net_handle_as_string(parent);
 #endif
                 //If this is the child of a chain, then traverse along the chain
                 if (current_distance_left != std::numeric_limits<size_t>::max()) {
@@ -1630,6 +2033,9 @@ cerr << "Start search along parent chain " << distance_index.net_handle_as_strin
             subgraph_in_distance_range_walk_graph(super_graph, min_distance, max_distance, subgraph, search_start_nodes, seen_nodes, traversal_start); 
             return;
         } else if (distance_index.is_snarl(parent)){
+#ifdef debug_subgraph
+            cerr << "Parent is a snarl of handle type " << SnarlDistanceIndex::get_handle_type(parent) << " at offset " << SnarlDistanceIndex::get_record_offset(parent) << endl;
+#endif
             //TODO: This might be overkill. It prevents us from adding nodes that shouldn't be in the subgraph, but might be too slow
             //If we don't check the other direction, go through the loop and add everything whose distance is lower than the minimum
             //to seen_nodes
@@ -1661,6 +2067,9 @@ cerr << "Start search along parent chain " << distance_index.net_handle_as_strin
                 });
             }
         } else if (distance_index.is_chain(parent)) {
+#ifdef debug_subgraph
+            cerr << "Parent is a chain of handle type " << SnarlDistanceIndex::get_handle_type(parent) << " at offset " << SnarlDistanceIndex::get_record_offset(parent) << endl;
+#endif
             //TODO: This is probably also overkill - walk a chain if there is a viable loop
             size_t distance_loop_right = distance_index.distance_in_parent(parent, current_net, current_net, super_graph, max_distance);
             size_t distance_loop_left =  distance_index.distance_in_parent(parent, distance_index.flip(current_net), distance_index.flip(current_net), super_graph, max_distance);
diff --git a/src/snarl_distance_index.hpp b/src/snarl_distance_index.hpp
index 43268d4b23..e502b9aa12 100644
--- a/src/snarl_distance_index.hpp
+++ b/src/snarl_distance_index.hpp
@@ -2,6 +2,7 @@
 #define VG_SNARL_DISTANCE_HPP_INCLUDED
 
 #include <bdsg/snarl_distance_index.hpp>
+#include <bdsg/ch.hpp> 
 #include "snarls.hpp"
 #include <structures/union_find.hpp>
 #include "hash_map.hpp"
@@ -36,7 +37,7 @@ void fill_in_distance_index(SnarlDistanceIndex* distance_index, const HandleGrap
 
 /// Fill in the temporary snarl record with distances
 void populate_snarl_index(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, 
-    pair<SnarlDistanceIndex::temp_record_t, size_t> snarl_index, size_t size_limit, bool only_top_level_chain_distances, const HandleGraph* graph) ;
+    SnarlDistanceIndex::temp_record_ref_t snarl_index, size_t size_limit, bool only_top_level_chain_distances, const HandleGraph* graph) ;
 
 SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(const HandleGraph* graph, const HandleGraphSnarlFinder* snarl_finder, 
                                                                          size_t size_limit, bool only_top_level_chain_distances);
diff --git a/src/snarls.cpp b/src/snarls.cpp
index abaa507681..004021d6aa 100644
--- a/src/snarls.cpp
+++ b/src/snarls.cpp
@@ -10,6 +10,7 @@
 #include "snarls.hpp"
 #include "vg/io/json2pb.h"
 #include "subgraph_overlay.hpp"
+#include "crash.hpp"
 
 namespace vg {
 
@@ -20,7 +21,7 @@ SnarlManager SnarlFinder::find_snarls_parallel() {
 }
 
 HandleGraphSnarlFinder::HandleGraphSnarlFinder(const HandleGraph* graph) : graph(graph) {
-    // Nothing to do!
+    crash_unless(graph != nullptr);
 }
 
 SnarlManager HandleGraphSnarlFinder::find_snarls_unindexed() {
diff --git a/src/subcommand/bench_dist_query_main.cpp b/src/subcommand/bench_dist_query_main.cpp
new file mode 100644
index 0000000000..3303ad1146
--- /dev/null
+++ b/src/subcommand/bench_dist_query_main.cpp
@@ -0,0 +1,233 @@
+/** \file bench_dist_query_main.cpp
+ *
+ * Defines the "vg bench-dist-query" subcommand, which benchmarks distance query speed across multiple indexes.
+ */
+
+#include <omp.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#include <iostream>
+#include <fstream>
+#include <filesystem>
+#include <sstream>
+#include <chrono>
+
+#include "subcommand.hpp"
+
+#include "../benchmark.hpp"
+#include "../version.hpp"
+
+#include "../snarl_distance_index.hpp"
+#include "../integrated_snarl_finder.hpp"
+#include "../algorithms/gfa_to_handle.hpp"
+#include <vg/io/vpkg.hpp>
+#include <bdsg/hash_graph.hpp>
+#include "../gbwtgraph_helper.hpp"
+
+
+
+using namespace std;
+using namespace vg;
+using namespace vg::subcommand;
+
+void help_bench_dist_query(char** argv) {
+    std::cerr << "usage: " << argv[0] << " bench-dist-query -g <graph.gbz> -d <index1.dist> [-d <index2.dist> ...] [options] >report.tsv" << endl
+         << "options:" << endl
+         << "  -g, --graph FILE         path to input GBZ graph file" << endl
+         << "  -d, --dist FILE          path to distance index file (repeatable)" << endl
+         << "  -q, --numQueries N       number of queries to run (default: 10000)" << endl
+         << "  -s, --save-queries FILE  save generated queries to FILE for reproducibility" << endl
+         << "  -Q, --load-queries FILE  load queries from FILE instead of generating new ones" << endl
+         << "  -p, --progress           show progress" << endl
+         << "  -h, --help               print this help message to stderr and exit" << endl;
+}
+
+
+int main_bench_dist_query(int argc, char** argv) {
+    bool show_progress = false;
+
+    string graph_path = "";
+    vector<string> dist_paths;
+    int num_queries = 10000;
+    string save_queries_path = "";
+    string load_queries_path = "";
+
+    int c;
+    optind = 2; // force optind past command positional argument
+    while (true) {
+        static struct option long_options[] =
+            {
+                {"progress",      no_argument,       0, 'p'},
+                {"help",          no_argument,       0, 'h'},
+                {"graph",         required_argument, 0, 'g'},
+                {"dist",          required_argument, 0, 'd'},
+                {"numQueries",    required_argument, 0, 'q'},
+                {"save-queries",  required_argument, 0, 's'},
+                {"load-queries",  required_argument, 0, 'Q'},
+                {0, 0, 0, 0}
+            };
+
+        int option_index = 0;
+        c = getopt_long(argc, argv, "g:d:q:s:Q:ph?",
+                        long_options, &option_index);
+
+        if (c == -1)
+            break;
+
+        switch (c) {
+        case 'g':
+            graph_path = optarg;
+            break;
+        case 'd':
+            dist_paths.push_back(optarg);
+            break;
+        case 'q':
+            {
+                num_queries = stoi(optarg);
+            }
+            break;
+        case 's':
+            save_queries_path = optarg;
+            break;
+        case 'Q':
+            load_queries_path = optarg;
+            break;
+        case 'p':
+            show_progress = true;
+            break;
+        case 'h':
+        case '?':
+            help_bench_dist_query(argv);
+            exit(1);
+            break;
+        default:
+            abort();
+        }
+    }
+
+    if (graph_path.empty()) {
+        cerr << "error: a GBZ graph file is required (-g)" << endl;
+        help_bench_dist_query(argv);
+        exit(1);
+    }
+
+    if (dist_paths.empty()) {
+        cerr << "error: at least one distance index file is required (-d)" << endl;
+        help_bench_dist_query(argv);
+        exit(1);
+    }
+
+    // Load GBZ graph
+    if (show_progress) {
+        cerr << "Loading GBZ graph from " << graph_path << "..." << endl;
+    }
+    gbwtgraph::GBZ gbz;
+    load_gbz(gbz, graph_path, show_progress);
+    const HandleGraph& graph = gbz.graph;
+    cerr << "Loaded graph with " << graph.get_node_count() << " nodes" << endl;
+
+    // Collect all node IDs
+    vector<nid_t> all_node_ids;
+    graph.for_each_handle([&](handle_t h) {
+        all_node_ids.push_back(graph.get_id(h));
+    });
+
+    using QueryEntry = pair<tuple<nid_t, bool, size_t>, tuple<nid_t, bool, size_t>>;
+    vector<QueryEntry> queries;
+
+    if (!load_queries_path.empty()) {
+        if (show_progress) {
+            cerr << "Loading queries from " << load_queries_path << "..." << endl;
+        }
+        ifstream qf(load_queries_path);
+        if (!qf) {
+            cerr << "error: cannot open query file: " << load_queries_path << endl;
+            exit(1);
+        }
+        string line;
+        while (getline(qf, line)) {
+            if (line.empty()) continue;
+            istringstream iss(line);
+            nid_t id1, id2; int rev1, rev2; size_t off1, off2;
+            iss >> id1 >> rev1 >> off1 >> id2 >> rev2 >> off2;
+            QueryEntry q;
+            q.first  = make_tuple(id1, (bool)rev1, off1);
+            q.second = make_tuple(id2, (bool)rev2, off2);
+            queries.push_back(q);
+        }
+        cerr << "Loaded " << queries.size() << " queries from " << load_queries_path << endl;
+    } else {
+        if (show_progress) {
+            cerr << "Generating " << num_queries << " queries..." << endl;
+        }
+        queries.resize(num_queries);
+        for (auto& query : queries) {
+            nid_t node1 = all_node_ids[rand() % all_node_ids.size()];
+            nid_t node2 = all_node_ids[rand() % all_node_ids.size()];
+            size_t len1 = graph.get_length(graph.get_handle(node1));
+            size_t len2 = graph.get_length(graph.get_handle(node2));
+            query.first  = make_tuple(node1, rand() % 2 == 1, len1 > 0 ? rand() % len1 : 0);
+            query.second = make_tuple(node2, rand() % 2 == 1, len2 > 0 ? rand() % len2 : 0);
+        }
+        cerr << "Generated " << queries.size() << " queries" << endl;
+    }
+
+    if (!save_queries_path.empty()) {
+        ofstream qf(save_queries_path);
+        if (!qf) {
+            cerr << "error: cannot open save file: " << save_queries_path << endl;
+            exit(1);
+        }
+        for (auto& query : queries) {
+            auto& [id1, rev1, off1] = query.first;
+            auto& [id2, rev2, off2] = query.second;
+            qf << id1 << "\t" << (int)rev1 << "\t" << off1 << "\t"
+               << id2 << "\t" << (int)rev2 << "\t" << off2 << "\n";
+        }
+        cerr << "Saved " << queries.size() << " queries to " << save_queries_path << endl;
+    }
+
+    // Output header
+    cout << "dist_index\tavg_query_us" << endl;
+
+    // Benchmark each distance index
+    for (const auto& dist_path : dist_paths) {
+        if (show_progress) {
+            cerr << "Loading distance index from " << dist_path << "..." << endl;
+        }
+        SnarlDistanceIndex distance_index;
+        distance_index.deserialize(dist_path);
+        cerr << "Loaded distance index from " << dist_path << endl;
+
+        // Pull the whole index into the OS page cache so timings reflect
+        // unavoidable query cost, not avoidable first-touch I/O
+        distance_index.preload(true);
+
+        // Time all queries
+        auto start = chrono::high_resolution_clock::now();
+        for (auto& query : queries) {
+            auto& [node1, node1_rev, node1_offset] = query.first;
+            auto& [node2, node2_rev, node2_offset] = query.second;
+            distance_index.minimum_distance(
+                node1, node1_rev, node1_offset,
+                node2, node2_rev, node2_offset,
+                false,
+                nullptr
+            );
+        }
+        auto end = chrono::high_resolution_clock::now();
+
+        double total_us = chrono::duration<double, micro>(end - start).count();
+        double avg_us = total_us / queries.size();
+
+        filesystem::path dist_fs_path(dist_path);
+        cout << dist_fs_path.filename().string() << "\t" << avg_us << endl;
+        cerr << dist_path << ": avg query time = " << avg_us << " us" << endl;
+    }
+
+    return 0;
+}
+
+// Register subcommand
+static Subcommand vg_bench_dist_query("bench-dist-query", "benchmark distance query speed across multiple indexes", DEVELOPMENT, main_bench_dist_query);
diff --git a/src/subcommand/call_main.cpp b/src/subcommand/call_main.cpp
index 70f83bc17e..6388981cf2 100644
--- a/src/subcommand/call_main.cpp
+++ b/src/subcommand/call_main.cpp
@@ -801,7 +801,7 @@ int main_call(int argc, char** argv) {
 
     unique_ptr<AlignmentEmitter> alignment_emitter;
     if (gaf_output) {
-        alignment_emitter = vg::io::get_non_hts_alignment_emitter("-", "GAF", {}, get_thread_count(), graph);
+        alignment_emitter = vg::io::get_non_hts_alignment_emitter("-", "GAF", {}, vg::get_thread_count(), graph);
         // TODO: There should be a general function for emitting headers. See giraffe_main.cpp.
         io::GafAlignmentEmitter* gaf_emitter = dynamic_cast<io::GafAlignmentEmitter*>(alignment_emitter.get());
         if (gbz_graph.get() != nullptr && gaf_emitter != nullptr) {
diff --git a/src/subcommand/gampcompare_main.cpp b/src/subcommand/gampcompare_main.cpp
index 01a5d59717..96bcd85ec0 100644
--- a/src/subcommand/gampcompare_main.cpp
+++ b/src/subcommand/gampcompare_main.cpp
@@ -215,8 +215,13 @@ int main_gampcompare(int argc, char** argv) {
                         for (size_t j = 0; j < path_mapped_positions.size(); ++j) {
                             if (path_true_positions[i].second == path_mapped_positions[j].second) {
                                 // there is a pair of positions on the same strand of the same path
-                                abs_dist = min<int64_t>(abs_dist,
-                                    std::abs(static_cast<int64_t>(path_true_positions[i].first) - static_cast<int64_t>(path_mapped_positions[j].first)));
+                                abs_dist = min<int64_t>(
+                                    abs_dist,
+                                    std::abs(
+                                        static_cast<int64_t>(path_true_positions[i].first) - 
+                                        static_cast<int64_t>(path_mapped_positions[j].first)
+                                    )
+                                );
                             }
                         }
                     }
diff --git a/src/subcommand/haplotypes_main.cpp b/src/subcommand/haplotypes_main.cpp
index f0fa15c20f..4bffd2b852 100644
--- a/src/subcommand/haplotypes_main.cpp
+++ b/src/subcommand/haplotypes_main.cpp
@@ -965,7 +965,7 @@ void validate_error_sequence(const Logger& logger, size_t chain_id, size_t subch
 }
 
 std::string validate_unary_path(const HandleGraph& graph, handle_t from, handle_t to) {
-    hash_set<handle_t> visited;
+    vg::hash_set<handle_t> visited;
     handle_t curr = from;
     while (curr != to) {
         if (visited.find(curr) != visited.end()) {
@@ -989,7 +989,7 @@ std::string validate_unary_path(const HandleGraph& graph, handle_t from, handle_
 // Returns true if the path from (start, offset) reaches the end without revisiting start or leaving the subchain.
 // The path may continue in subsequent fragments.
 bool trace_path(
-    const gbwt::GBWT& index, const gbwt::FragmentMap& fragment_map, const hash_set<nid_t>& subchain_nodes,
+    const gbwt::GBWT& index, const gbwt::FragmentMap& fragment_map, const vg::hash_set<nid_t>& subchain_nodes,
     gbwt::size_type sequence_id, gbwt::node_type start, gbwt::size_type offset, gbwt::node_type end
 ) {
     gbwt::edge_type pos(start, offset);
@@ -1132,8 +1132,8 @@ void validate_chain(const Logger& logger,
         // Sequences: normal subchains.
         if (subchain.type == Haplotypes::Subchain::normal) {
             std::vector<gbwt::size_type> da = r_index.decompressDA(subchain.start);
-            hash_set<nid_t> nodes = extract_subchain(graph, gbwtgraph::GBWTGraph::node_to_handle(subchain.start), gbwtgraph::GBWTGraph::node_to_handle(subchain.end));
-            hash_set<Haplotypes::sequence_type> selected;
+            vg::hash_set<nid_t> nodes = extract_subchain(graph, gbwtgraph::GBWTGraph::node_to_handle(subchain.start), gbwtgraph::GBWTGraph::node_to_handle(subchain.end));
+            vg::hash_set<Haplotypes::sequence_type> selected;
             for (size_t i = 0; i < da.size(); i++) {
                 if (trace_path(*(graph.index), fragment_map, nodes, da[i], subchain.start, i, subchain.end)) {
                     selected.insert(Haplotypes::sequence_type(da[i], i));
@@ -1159,7 +1159,7 @@ void validate_chain(const Logger& logger,
                 std::string message = expected_got(da.size(), subchain.sequences.size()) + " sequences (prefix / suffix)";
                 validate_error_subchain(logger, chain_id, subchain_id, message);
             }
-            hash_set<Haplotypes::sequence_type> truth;
+            vg::hash_set<Haplotypes::sequence_type> truth;
             for (size_t i = 0; i < da.size(); i++) {
                 truth.insert({ da[i], i });
             }
@@ -1180,7 +1180,7 @@ void validate_chain(const Logger& logger,
 
         // Kmers.
         if (subchain.type != Haplotypes::Subchain::full_haplotype) {
-            hash_set<Haplotypes::Subchain::kmer_type> all_kmers;
+            vg::hash_set<Haplotypes::Subchain::kmer_type> all_kmers;
             for (size_t i = 0; i < subchain.kmers.size(); i++) {
                 all_kmers.insert(subchain.kmers[i]);
             }
@@ -1188,14 +1188,14 @@ void validate_chain(const Logger& logger,
                 std::string message = expected_got(subchain.kmers.size(), all_kmers.size()) + " kmers";
                 validate_error_subchain(logger, chain_id, subchain_id, message);
             }
-            hash_map<Haplotypes::Subchain::kmer_type, size_t> used_kmers; // (kmer used in haplotypes, number of sequences that contain it)
-            hash_map<Haplotypes::Subchain::kmer_type, size_t> missing_kmers; // (kmer not used in haplotypes, number of sequences that contain it)
+            vg::hash_map<Haplotypes::Subchain::kmer_type, size_t> used_kmers; // (kmer used in haplotypes, number of sequences that contain it)
+            vg::hash_map<Haplotypes::Subchain::kmer_type, size_t> missing_kmers; // (kmer not used in haplotypes, number of sequences that contain it)
             for (size_t i = 0; i < subchain.sequences.size(); i++) {
                 std::vector<std::string> haplotype = get_haplotype(
                     graph, fragment_map,
                     subchain.sequences[i], subchain.start, subchain.end, minimizer_index.k()
                 );
-                hash_map<Haplotypes::Subchain::kmer_type, bool> unique_minimizers; // (kmer, used in the sequence)
+                vg::hash_map<Haplotypes::Subchain::kmer_type, bool> unique_minimizers; // (kmer, used in the sequence)
                 for (const std::string& sequence : haplotype) {
                     auto minimizers = minimizer_index.minimizers(sequence);
                     for (auto& minimizer : minimizers) {
@@ -1322,7 +1322,7 @@ void validate_haplotypes(const Logger& logger,
     if (verbosity >= HaplotypePartitioner::Verbosity::verbosity_detailed) {
         logger.info() << "Validating kmer specificity" << std::endl;
     }
-    hash_map<Haplotypes::Subchain::kmer_type, std::pair<size_t, size_t>> kmers;
+    vg::hash_map<Haplotypes::Subchain::kmer_type, std::pair<size_t, size_t>> kmers;
     size_t collisions = 0, total_kmers = 0;
     for (size_t chain_id = 0; chain_id < haplotypes.components(); chain_id++) {
         const Haplotypes::TopLevelChain& chain = haplotypes.chains[chain_id];
diff --git a/src/subcommand/inject_main.cpp b/src/subcommand/inject_main.cpp
index 268bbeeac0..de8ce07ed7 100644
--- a/src/subcommand/inject_main.cpp
+++ b/src/subcommand/inject_main.cpp
@@ -138,7 +138,7 @@ int main_inject(int argc, char** argv) {
         set_crash_context(aln.name());
         if (add_identity) {
             // Calculate & save identity statistic
-            aln.set_identity(identity(aln.path()));
+            aln.set_identity(vg::identity(aln.path()));
         }
         if (rescore) {
             // Rescore the alignment
diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp
index f2f69da695..755c45dcfd 100644
--- a/src/subcommand/minimizer_main.cpp
+++ b/src/subcommand/minimizer_main.cpp
@@ -94,7 +94,6 @@ int main_minimizer(int argc, char** argv) {
             logger.info() << "Loading SnarlDistanceIndex from " << config.distance_name << std::endl;
         }
         distance_index = vg::io::VPKG::load_one<SnarlDistanceIndex>(config.distance_name);
-        distance_index->preload(true);
     }
 
     ZipCodeCollection oversized_zipcodes;
diff --git a/src/subcommand/pack_main.cpp b/src/subcommand/pack_main.cpp
index 8d6d7155a9..10146cd2ff 100644
--- a/src/subcommand/pack_main.cpp
+++ b/src/subcommand/pack_main.cpp
@@ -200,7 +200,7 @@ int main_pack(int argc, char** argv) {
     // use some naive heuristics to come up with bin count and batch size based on thread count
     // more bins: finer grained parallelism at cost of more mutexes and allocations
     // bigger batch size: more robustness to sorted input at cost of less parallelism
-    size_t num_threads = get_thread_count();
+    size_t num_threads = vg::get_thread_count();
     size_t batch_size = Packer::estimate_batch_size(num_threads);
     size_t bin_count = Packer::estimate_bin_count(num_threads);
 
diff --git a/src/unittest/banded_global_aligner.cpp b/src/unittest/banded_global_aligner.cpp
index 045e9bfa97..6b5fb4b3c8 100644
--- a/src/unittest/banded_global_aligner.cpp
+++ b/src/unittest/banded_global_aligner.cpp
@@ -10,7 +10,7 @@
 #include "vg.hpp"
 #include "path.hpp"
 #include "banded_global_aligner.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "bdsg/hash_graph.hpp"
 #include "../algorithms/pad_band.hpp"
 
@@ -3515,10 +3515,9 @@ namespace vg {
             
             SECTION( "Banded global aligner does not produce empty edits when there is an insertion an empty node") {
                 string graph_json = R"({"edge": [{"to_end": true, "from_start": true, "to": 22, "from": 20}, {"to": 26, "from": 20}, {"to": 24, "from": 20}, {"to_end": true, "from_start": true, "to": 26, "from": 4}, {"to_end": true, "from_start": true, "to": 24, "from": 4}], "node": [{"sequence": "C", "id": 24}, {"sequence": "GAGA", "id": 20}, {"sequence": "T", "id": 26}, {"sequence": "GGAGTCT", "id": 4}, {"id": 22}]})";
-                
-                Graph graph;
-                json2pb(graph, graph_json.c_str(), graph_json.size());
-                VG vg_graph(graph);
+
+                bdsg::HashGraph vg_graph;
+                vg::io::json2graph(graph_json, &vg_graph);
                 
                 TestAligner aligner_source;
                 const Aligner& aligner = *aligner_source.get_regular_aligner();
diff --git a/src/unittest/cactus.cpp b/src/unittest/cactus.cpp
index 7447ee247d..5e518db4ef 100644
--- a/src/unittest/cactus.cpp
+++ b/src/unittest/cactus.cpp
@@ -5,8 +5,9 @@
 
 #include <iostream>
 #include <string>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../cactus.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "catch.hpp"
 
 namespace vg {
@@ -14,9 +15,7 @@ namespace unittest {
 using namespace std;
 
 TEST_CASE("We can convert a two-tailed graph to Cactus", "[cactus]") {
-    
-    VG graph;
-    
+
     string graph_json = R"(
     {"node":[{"sequence":"GT","id":7575},
     {"sequence":"TGTTAACAGCACAACATTTA","id":7580},
@@ -25,20 +24,18 @@ TEST_CASE("We can convert a two-tailed graph to Cactus", "[cactus]") {
     "edge":[{"from":7575,"to":7580,"from_start":true},
     {"from":7575,"to":7576}]}
     )";
-    
-    Graph g;
-    json2pb(g, graph_json.c_str(), graph_json.size());
-    graph.extend(g);
 
-    // Make sure we can make a Cactus graph and get something out.    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
+    // Make sure we can make a Cactus graph and get something out.
     auto cactusified = cactusify(graph);
     REQUIRE(cactusified.is_valid());
     
 }
 
 TEST_CASE("We can convert a hairpin graph to Cactus", "[cactus]") {
-    VG graph;
-    
+
     // Here's a graph where only the left side of node 2 is dangling, and the right side of node 1 has a self loop.
     string graph_json = R"(
     {"node": [{"sequence": "A", "id": 1},
@@ -46,12 +43,11 @@ TEST_CASE("We can convert a hairpin graph to Cactus", "[cactus]") {
     "edge": [{"from": 2, "to": 1},
     {"from": 1, "to": 1, "to_end": true}]}
     )";
-    
-    Graph g;
-    json2pb(g, graph_json.c_str(), graph_json.size());
-    graph.extend(g);
 
-    // Make sure we can make a Cactus graph and get something out.    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
+    // Make sure we can make a Cactus graph and get something out.
     auto cactusified = cactusify(graph);
     REQUIRE(cactusified.is_valid());
 }
diff --git a/src/unittest/chunker.cpp b/src/unittest/chunker.cpp
index 24f7d3b645..3be2298c15 100644
--- a/src/unittest/chunker.cpp
+++ b/src/unittest/chunker.cpp
@@ -7,6 +7,8 @@
 #include "vg.hpp"
 #include "xg.hpp"
 #include "path.hpp"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 namespace vg {
 namespace unittest {
@@ -83,13 +85,13 @@ TEST_CASE("basic graph chunking", "[chunk]") {
     
     )";
     
-    // Load it into Protobuf
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    
+    // Load the graph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Pass it over to XG
     xg::XG index;
-    index.from_path_handle_graph(VG(chunk));
+    index.from_path_handle_graph(graph);
 
     PathChunker chunker(&index);
 
diff --git a/src/unittest/copy_graph.cpp b/src/unittest/copy_graph.cpp
index 581b683130..4e7e878075 100644
--- a/src/unittest/copy_graph.cpp
+++ b/src/unittest/copy_graph.cpp
@@ -1,6 +1,7 @@
 #include "catch.hpp"
 #include "../handle.hpp"
 #include "../vg.hpp"
+#include "../io/json2graph.hpp"
 #include "xg.hpp"
 
 #include "bdsg/packed_graph.hpp"
@@ -53,14 +54,15 @@ namespace vg {
                          ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_handle_graph(&xg, &vg);
-            
+
             REQUIRE(xg.get_node_count() == 1);
             REQUIRE(vg.get_node_count() == 1);
         }
@@ -72,14 +74,15 @@ namespace vg {
                          ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_handle_graph(&xg, &pg);
-            
+
             REQUIRE(xg.get_node_count() == 1);
             REQUIRE(pg.get_node_count() == 1);
         }
@@ -91,14 +94,15 @@ namespace vg {
                          ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_handle_graph(&xg, &hg);
-            
+
             REQUIRE(xg.get_node_count() == 1);
             REQUIRE(hg.get_node_count() == 1);
         }
@@ -120,19 +124,20 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_handle_graph(&xg, &vg);
-            
+
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(vg.get_node_count() == 4);
             REQUIRE(vg.edge_count() == 4);
             REQUIRE(vg.length() == 16);
-            
+
         }
         TEST_CASE( "copy_handle_graph converter works on graphs with one reversing edge, xg to pg", "[handle][pg][xg]") {
             string graph_json = R"(
@@ -151,14 +156,15 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_handle_graph(&xg, &pg);
-            
+
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(pg.get_node_count() == 4);
 
@@ -168,14 +174,14 @@ namespace vg {
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             pg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 4);
-        
+
         }
         TEST_CASE( "copy_handle_graph converter works on graphs with one reversing edge, xg to hg", "[handle][hg][xg]") {
             string graph_json = R"(
@@ -194,14 +200,15 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_handle_graph(&xg, &hg);
-            
+
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(hg.get_node_count() == 4);
             int length = 0;
@@ -210,14 +217,14 @@ namespace vg {
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             hg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 4);
-            
+
         }
         TEST_CASE( "copy_handle_graph converter works on graphs with reversing edges and loops", "[handle][vg][xg]") {
             string graph_json = R"(
@@ -239,14 +246,15 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_handle_graph(&xg, &vg);
-            
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
@@ -274,26 +282,27 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_handle_graph(&xg, &pg);
-            
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(pg.get_node_count() == 4);
-            
+
             int length = 0;
             pg.for_each_handle([&](const handle_t& here) {
                 length += pg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             pg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
@@ -321,26 +330,27 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_handle_graph(&xg, &hg);
-            
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(hg.get_node_count() == 4);
-            
+
             int length = 0;
             hg.for_each_handle([&](const handle_t& here) {
                 length += hg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             hg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
@@ -382,16 +392,17 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_path_handle_graph(&xg, &vg);
 
-            
-            
+
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
@@ -444,37 +455,38 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_path_handle_graph(&xg, &pg);
-            
-            
-            
+
+
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(pg.get_node_count() == 4);
 
-            
+
             int length = 0;
             pg.for_each_handle([&](const handle_t& here) {
                 length += pg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             pg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 7);
-            
-            
+
+
             REQUIRE(pg.has_path("path1") == true);
             REQUIRE(pg.has_path("path2") == true);
             REQUIRE(pg.get_path_count() == 2);
@@ -521,37 +533,38 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_path_handle_graph(&xg, &hg);
-            
-            
-            
+
+
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(hg.get_node_count() == 4);
-            
-            
+
+
             int length = 0;
             hg.for_each_handle([&](const handle_t& here) {
                 length += hg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             hg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 7);
-            
-            
+
+
             REQUIRE(hg.has_path("path1") == true);
             REQUIRE(hg.has_path("path2") == true);
             REQUIRE(hg.get_path_count() == 2);
diff --git a/src/unittest/dijkstra.cpp b/src/unittest/dijkstra.cpp
index 2608567153..4e94414040 100644
--- a/src/unittest/dijkstra.cpp
+++ b/src/unittest/dijkstra.cpp
@@ -6,7 +6,7 @@
 #include <iostream>
 #include <string>
 #include "../handle.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../vg.hpp"
 #include "catch.hpp"
 
@@ -125,14 +125,12 @@ TEST_CASE("Dijkstra search handles early stopping correctly", "[dijkstra][algori
 TEST_CASE("Dijkstra search works on a particular problem graph", "[dijkstra][algorithms]") {
 
     string graph_json = R"(
-{"node":[{"sequence":"A","id":"2454530"},{"sequence":"AGTGCTGGAGAGGATGTGGAGAAATAGGAAC","id":"2454529"},{"sequence":"C","id":"2454532"},{"sequence":"TTTTACACTGTTGGTGGGACTGTAAA","id":"2454533"},{"sequence":"A","id":"2454527"},{"sequence":"C","id":"2454528"},{"sequence":"G","id":"2454531"},{"sequence":"C","id":"2454534"},{"sequence":"T","id":"2454535"},{"sequence":"GGGTAATAA","id":"2454526"},{"sequence":"TAGTTCAACCATTGTGGAAGACTGTGGCAATT","id":"2454536"}],"edge":[{"from":"2454530","to":"2454532"},{"from":"2454530","to":"2454533"},{"from":"2454529","to":"2454530"},{"from":"2454529","to":"2454531"},{"from":"2454532","to":"2454533"},{"from":"2454533","to":"2454534"},{"from":"2454533","to":"2454535"},{"from":"2454527","to":"2454529"},{"from":"2454528","to":"2454529"},{"from":"2454531","to":"2454532"},{"from":"2454531","to":"2454533"},{"from":"2454534","to":"2454536"},{"from":"2454535","to":"2454536"},{"from":"2454526","to":"2454527"},{"from":"2454526","to":"2454528"}],"path":[{"name":"21","mapping":[{"position":{"node_id":"2454526"},"edit":[{"from_length":9,"to_length":9}],"rank":"3049077"},{"position":{"node_id":"2454528"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049078"},{"position":{"node_id":"2454529"},"edit":[{"from_length":31,"to_length":31}],"rank":"3049079"},{"position":{"node_id":"2454531"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049080"},{"position":{"node_id":"2454532"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049081"},{"position":{"node_id":"2454533"},"edit":[{"from_length":26,"to_length":26}],"rank":"3049082"},{"position":{"node_id":"2454535"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049083"},{"position":{"node_id":"2454536"},"edit":[{"from_length":32,"to_length":32}],"rank":"3049084"}]}]}    
+{"node":[{"sequence":"A","id":"2454530"},{"sequence":"AGTGCTGGAGAGGATGTGGAGAAATAGGAAC","id":"2454529"},{"sequence":"C","id":"2454532"},{"sequence":"TTTTACACTGTTGGTGGGACTGTAAA","id":"2454533"},{"sequence":"A","id":"2454527"},{"sequence":"C","id":"2454528"},{"sequence":"G","id":"2454531"},{"sequence":"C","id":"2454534"},{"sequence":"T","id":"2454535"},{"sequence":"GGGTAATAA","id":"2454526"},{"sequence":"TAGTTCAACCATTGTGGAAGACTGTGGCAATT","id":"2454536"}],"edge":[{"from":"2454530","to":"2454532"},{"from":"2454530","to":"2454533"},{"from":"2454529","to":"2454530"},{"from":"2454529","to":"2454531"},{"from":"2454532","to":"2454533"},{"from":"2454533","to":"2454534"},{"from":"2454533","to":"2454535"},{"from":"2454527","to":"2454529"},{"from":"2454528","to":"2454529"},{"from":"2454531","to":"2454532"},{"from":"2454531","to":"2454533"},{"from":"2454534","to":"2454536"},{"from":"2454535","to":"2454536"},{"from":"2454526","to":"2454527"},{"from":"2454526","to":"2454528"}],"path":[{"name":"21","mapping":[{"position":{"node_id":"2454526"},"edit":[{"from_length":9,"to_length":9}],"rank":"3049077"},{"position":{"node_id":"2454528"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049078"},{"position":{"node_id":"2454529"},"edit":[{"from_length":31,"to_length":31}],"rank":"3049079"},{"position":{"node_id":"2454531"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049080"},{"position":{"node_id":"2454532"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049081"},{"position":{"node_id":"2454533"},"edit":[{"from_length":26,"to_length":26}],"rank":"3049082"},{"position":{"node_id":"2454535"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049083"},{"position":{"node_id":"2454536"},"edit":[{"from_length":32,"to_length":32}],"rank":"3049084"}]}]}
     )";
-    
-    Graph g;
-    json2pb(g, graph_json);
-    
-    // Wrap the graph in a HandleGraph
-    VG graph(g);
+
+    // Load the graph
+    HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Decide where to start
     handle_t start = graph.get_handle(2454536, true);
diff --git a/src/unittest/gbwt_extender.cpp b/src/unittest/gbwt_extender.cpp
index ec61517cfa..5d25bf956e 100644
--- a/src/unittest/gbwt_extender.cpp
+++ b/src/unittest/gbwt_extender.cpp
@@ -5,7 +5,7 @@
 
 #include "../gbwt_extender.hpp"
 #include "../gbwt_helper.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../utility.hpp"
 #include "../vg.hpp"
 
@@ -90,10 +90,9 @@ gbwt::GBWT build_gbwt_index() {
 
 // Build a GBWTGraph using the provided GBWT index.
 gbwtgraph::GBWTGraph build_gbwt_graph(const gbwt::GBWT& gbwt_index) {
-    Graph graph;
-    json2pb(graph, gapless_extender_graph.c_str(), gapless_extender_graph.size());
-    VG vg_graph(graph);
-    return gbwtgraph::GBWTGraph(gbwt_index, vg_graph, nullptr);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(gapless_extender_graph, &graph);
+    return gbwtgraph::GBWTGraph(gbwt_index, graph, nullptr);
 }
 
 void same_position(const Position& pos, const Position& correct) {
diff --git a/src/unittest/genotypekit.cpp b/src/unittest/genotypekit.cpp
index af9bc2a4d8..b5d460c59a 100644
--- a/src/unittest/genotypekit.cpp
+++ b/src/unittest/genotypekit.cpp
@@ -10,6 +10,8 @@
 #include "../traversal_finder.hpp"
 #include "xg.hpp"
 #include "../haplotype_extracter.hpp"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 namespace Catch {
 
@@ -62,10 +64,10 @@ namespace vg {
 namespace unittest {
 
 TEST_CASE("sites can be found with Cactus", "[genotype]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -90,7 +92,7 @@ TEST_CASE("sites can be found with Cactus", "[genotype]") {
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
+
         ],
         "path": [
             {"name": "hint", "mapping": [
@@ -101,14 +103,13 @@ TEST_CASE("sites can be found with Cactus", "[genotype]") {
             ]}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
+  // Note: Using VG here because the test uses VG-specific methods like get_node() and get_edge()
   VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a CactusSnarlFinder
   unique_ptr<SnarlFinder> finder(new CactusSnarlFinder(graph));
@@ -196,10 +197,10 @@ TEST_CASE("sites can be found with Cactus", "[genotype]") {
 }
 
 TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integrated-snarl-finder]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -224,7 +225,7 @@ TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integ
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
+
         ],
         "path": [
             {"name": "hint", "mapping": [
@@ -235,14 +236,13 @@ TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integ
             ]}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
+  // Note: Using VG here because the test uses VG-specific methods like get_node() and get_edge()
   VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  vg::io::json2graph(graph_json, &graph);
     
   // Make an IntegratedSnarlFinder
   unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -329,7 +329,7 @@ TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integ
 }
 
 TEST_CASE("IntegratedSnarlFinder works when cactus graph contains back-to-back cycles along root path", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -351,17 +351,15 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains back-to-back c
             {"from": 3, "to": 5},
             {"from": 4, "to": 6},
             {"from": 5, "to": 6}
-            
+
         ]
     }
 
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -375,18 +373,16 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains back-to-back c
 }
 
 TEST_CASE("IntegratedSnarlFinder works on an all bridge edge Y graph with specific numbering", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
     {"node":[{"id":"2","sequence":"G"},{"id":"3","sequence":"G"},{"id":"4","sequence":"G"},{"id":"5","sequence":"G"},{"id":"6","sequence":"G"},{"id":"11","sequence":"G"}],
-    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}    
+    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -403,18 +399,16 @@ TEST_CASE("IntegratedSnarlFinder works on an all bridge edge Y graph with specif
 }
 
 TEST_CASE("IntegratedSnarlFinder roots correctly an all bridge edge Y graph with winning longest path", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
     {"node":[{"id":"2","sequence":"G"},{"id":"3","sequence":"G"},{"id":"4","sequence":"GG"},{"id":"5","sequence":"G"},{"id":"6","sequence":"G"},{"id":"11","sequence":"GG"}],
-    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}    
+    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -452,7 +446,7 @@ TEST_CASE("IntegratedSnarlFinder roots correctly an all bridge edge Y graph with
 }
 
 TEST_CASE("IntegratedSnarlFinder works when cactus graph contains longer back-to-back cycles along root path", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -482,17 +476,15 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains longer back-to
             {"from": 32, "to": 5},
             {"from": 4, "to": 6},
             {"from": 5, "to": 6}
-            
+
         ]
     }
 
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -506,50 +498,48 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains longer back-to
 }
 
 TEST_CASE("IntegratedSnarlFinder works on a complex bundle-y region with a nested snarl", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
         {"edge": [{"from": "129672", "to": "129673"},
-                  {"from": "129662", "to": "129663"}, 
-                  {"from": "129662", "to": "129664"}, 
-                  {"from": "129664", "to": "129665"}, 
-                  {"from": "129664", "to": "129666"}, 
-                  {"from": "129666", "to": "129668"}, 
-                  {"from": "129666", "to": "129669"}, 
-                  {"from": "129666", "to": "129667"}, 
-                  {"from": "129667", "to": "129668"}, 
-                  {"from": "129667", "to": "129669"}, 
-                  {"from": "129669", "to": "129670"}, 
-                  {"from": "129669", "to": "129673"}, 
-                  {"from": "129671", "to": "129672"}, 
-                  {"from": "129668", "to": "129670"}, 
-                  {"from": "129668", "to": "129673"}, 
-                  {"from": "129665", "to": "129668"}, 
-                  {"from": "129665", "to": "129669"}, 
-                  {"from": "129665", "to": "129667"}, 
-                  {"from": "129670", "to": "129671"}, 
-                  {"from": "129670", "to": "129672"}, 
-                  {"from": "129663", "to": "129665"}, 
-                  {"from": "129663", "to": "129666"}], 
-        "node": [{"id": "129672", "sequence": "AT"}, 
-                 {"id": "129662", "sequence": "CAGGTCAAACTGTGAT"}, 
-                 {"id": "129664", "sequence": "T"}, 
-                 {"id": "129666", "sequence": "T"}, 
-                 {"id": "129667", "sequence": "G"}, 
-                 {"id": "129669", "sequence": "G"}, 
-                 {"id": "129671", "sequence": "T"}, 
-                 {"id": "129668", "sequence": "A"}, 
-                 {"id": "129665", "sequence": "A"}, 
-                 {"id": "129670", "sequence": "A"}, 
-                 {"id": "129673", "sequence": "ATATATATATACTTATTGTAAAAATCTTTAGA"}, 
+                  {"from": "129662", "to": "129663"},
+                  {"from": "129662", "to": "129664"},
+                  {"from": "129664", "to": "129665"},
+                  {"from": "129664", "to": "129666"},
+                  {"from": "129666", "to": "129668"},
+                  {"from": "129666", "to": "129669"},
+                  {"from": "129666", "to": "129667"},
+                  {"from": "129667", "to": "129668"},
+                  {"from": "129667", "to": "129669"},
+                  {"from": "129669", "to": "129670"},
+                  {"from": "129669", "to": "129673"},
+                  {"from": "129671", "to": "129672"},
+                  {"from": "129668", "to": "129670"},
+                  {"from": "129668", "to": "129673"},
+                  {"from": "129665", "to": "129668"},
+                  {"from": "129665", "to": "129669"},
+                  {"from": "129665", "to": "129667"},
+                  {"from": "129670", "to": "129671"},
+                  {"from": "129670", "to": "129672"},
+                  {"from": "129663", "to": "129665"},
+                  {"from": "129663", "to": "129666"}],
+        "node": [{"id": "129672", "sequence": "AT"},
+                 {"id": "129662", "sequence": "CAGGTCAAACTGTGAT"},
+                 {"id": "129664", "sequence": "T"},
+                 {"id": "129666", "sequence": "T"},
+                 {"id": "129667", "sequence": "G"},
+                 {"id": "129669", "sequence": "G"},
+                 {"id": "129671", "sequence": "T"},
+                 {"id": "129668", "sequence": "A"},
+                 {"id": "129665", "sequence": "A"},
+                 {"id": "129670", "sequence": "A"},
+                 {"id": "129673", "sequence": "ATATATATATACTTATTGTAAAAATCTTTAGA"},
                  {"id": "129663", "sequence": "G"}]}
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -579,23 +569,21 @@ TEST_CASE("IntegratedSnarlFinder works on a complex bundle-y region with a neste
 }
 
 TEST_CASE("CactusSnarlFinder safely handles a single node graph", "[genotype][cactus-snarl-finder]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "GATTACA"}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
-  VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a CactusSnarlFinder
   unique_ptr<SnarlFinder> finder(new CactusSnarlFinder(graph));
@@ -607,15 +595,13 @@ TEST_CASE("CactusSnarlFinder safely handles a single node graph", "[genotype][ca
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a completely empty graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = "{}";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make a IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -625,7 +611,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a completely empty graph", "[gen
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a single node graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -638,10 +624,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node graph", "[genotype
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -651,7 +635,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node graph", "[genotype
 }
 
 TEST_CASE("IntegratedSnarlFinder produces all the correct types of single-node chains", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -673,10 +657,8 @@ TEST_CASE("IntegratedSnarlFinder produces all the correct types of single-node c
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     IntegratedSnarlFinder finder(graph);
@@ -736,7 +718,7 @@ TEST_CASE("IntegratedSnarlFinder produces all the correct types of single-node c
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a path when forced to root at one end", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -757,10 +739,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a path when forced to root at on
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -770,7 +750,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a path when forced to root at on
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a single node connected component in a larger graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -787,10 +767,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node connected componen
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -813,7 +791,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node connected componen
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a single node cycle", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -828,10 +806,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node cycle", "[genotype
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -844,7 +820,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node cycle", "[genotype
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a totally connected graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -866,10 +842,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a totally connected graph", "[ge
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -882,7 +856,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a totally connected graph", "[ge
 }
 
 TEST_CASE("IntegratedSnarlFinder prefers to root at a bridge edge path in a tie", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -903,10 +877,8 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a bridge edge path in a tie"
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -935,7 +907,7 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a bridge edge path in a tie"
 }
 
 TEST_CASE("IntegratedSnarlFinder prefers to root at a cycle that is 1 bp longer", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -956,10 +928,8 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a cycle that is 1 bp longer"
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -988,7 +958,7 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a cycle that is 1 bp longer"
 }
 
 TEST_CASE("IntegratedSnarlFinder prefers to root at a chain with an up-weighted node", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -1009,10 +979,8 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a chain with an up-weighted
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder that adds 10 bp to node 4's apparent length
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph, {{4, 10}}));
@@ -1041,7 +1009,7 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a chain with an up-weighted
 }
 
 TEST_CASE("IntegratedSnarlFinder sees tips as disqualifying ultrabubbles", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -1066,10 +1034,8 @@ TEST_CASE("IntegratedSnarlFinder sees tips as disqualifying ultrabubbles", "[gen
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -1098,10 +1064,10 @@ TEST_CASE("IntegratedSnarlFinder sees tips as disqualifying ultrabubbles", "[gen
 }
 
 TEST_CASE("CactusSnarlFinder throws an error instead of crashing when the graph has no edges", "[genotype][cactus-snarl-finder]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -1115,14 +1081,12 @@ TEST_CASE("CactusSnarlFinder throws an error instead of crashing when the graph
             {"id": 9, "sequence": "A"}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
-  VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a CactusSnarlFinder
   unique_ptr<SnarlFinder> finder(new CactusSnarlFinder(graph));
@@ -1183,7 +1147,7 @@ TEST_CASE("fixed priors can be assigned to genotypes", "[genotype]") {
 TEST_CASE("TrivialTraversalFinder can find traversals", "[genotype]") {
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -1208,7 +1172,7 @@ TEST_CASE("TrivialTraversalFinder can find traversals", "[genotype]") {
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
+
         ],
         "path": [
             {"name": "hint", "mapping": [
@@ -1219,14 +1183,12 @@ TEST_CASE("TrivialTraversalFinder can find traversals", "[genotype]") {
             ]}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
-  VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a site
   Snarl site;
@@ -1329,12 +1291,10 @@ TEST_CASE("CactusSnarlFinder can differentiate ultrabubbles from snarls", "[geno
         ]
         }
         )";
-        
+
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         CactusSnarlFinder cubs(graph);
@@ -1381,10 +1341,8 @@ TEST_CASE("CactusSnarlFinder can differentiate ultrabubbles from snarls", "[geno
         )";
     
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         CactusSnarlFinder cubs(graph);
@@ -1454,10 +1412,8 @@ TEST_CASE("IntegratedSnarlFinder can differentiate ultrabubbles from snarls", "[
         )";
         
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         IntegratedSnarlFinder cubs(graph);
@@ -1504,10 +1460,8 @@ TEST_CASE("IntegratedSnarlFinder can differentiate ultrabubbles from snarls", "[
         )";
     
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         IntegratedSnarlFinder cubs(graph);
@@ -1581,11 +1535,9 @@ TEST_CASE("RepresentativeTraversalFinder finds traversals correctly", "[genotype
         }
         )";
     
-    // Make an actual graph
+    // Load the graph. Needs to be a vg because we will give it to a SupportAugmentedGraph later. 
     VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    vg::io::json2graph(graph_json, &graph);
 
     // Find the snarls
     CactusSnarlFinder cubs(graph);
@@ -1713,11 +1665,9 @@ TEST_CASE("RepresentativeTraversalFinder finds traversals of simple inversions",
     }
     )";
 
-    // Make an actual graph
+    // Load the graph. Needs to be a vg because we will give it to a SupportAugmentedGraph later. 
     VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    vg::io::json2graph(graph_json, &graph);
 
     // Find the snarls
     CactusSnarlFinder cubs(graph);
@@ -1774,11 +1724,11 @@ TEST_CASE("GBWTTraversalFinder finds traversals for GBWT threads", "[genotype][g
     string graph_json = R"({"node": [{"id": 1, "sequence": "CAAATAAGGCTT"}, {"id": 2, "sequence": "G"}, {"id": 3, "sequence": "GGAAATTTTC"}, {"id": 4, "sequence": "C"}, {"id": 5, "sequence": "TGGAGTTCTATTATATTCC"}, {"id": 6, "sequence": "G"}, {"id": 7, "sequence": "A"}, {"id": 8, "sequence": "ACTCTCTGGTTCCTG"}, {"id": 9, "sequence": "A"}, {"id": 10, "sequence": "G"}, {"id": 11, "sequence": "TGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCA"}], "edge": [{"from": 1, "to": 2}, {"from": 1, "to": 3}, {"from": 2, "to": 3}, {"from": 3, "to": 4}, {"from": 3, "to": 5}, {"from": 4, "to": 5}, {"from": 5, "to": 6}, {"from": 5, "to": 7}, {"from": 6, "to": 8}, {"from": 7, "to": 8}, {"from": 8, "to": 9}, {"from": 8, "to": 10}, {"from": 9, "to": 11}, {"from": 10, "to": 11}]})";
   
     // Load the JSON
-    vg::Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(vg::VG(proto_graph));
+    xg_index.from_path_handle_graph(graph);
     
     gbwt::Verbosity::set(gbwt::Verbosity::SILENT);
   
diff --git a/src/unittest/genotyper.cpp b/src/unittest/genotyper.cpp
index e2e9f7a142..4228b16ee3 100644
--- a/src/unittest/genotyper.cpp
+++ b/src/unittest/genotyper.cpp
@@ -7,6 +7,7 @@
 #include "../snarls.hpp"
 #include "../cactus_snarl_finder.hpp"
 #include "../traversal_finder.hpp"
+#include "../io/json2graph.hpp"
 
 namespace vg {
 namespace unittest {
@@ -41,15 +42,6 @@ TEST_CASE("traversals can be found from reads", "[genotyper]") {
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
-        ],
-        "path": [
-            {"name": "hint", "mapping": [
-                {"position": {"node_id": 1}, "rank" : 1 },
-                {"position": {"node_id": 6}, "rank" : 2 },
-                {"position": {"node_id": 8}, "rank" : 3 },
-                {"position": {"node_id": 9}, "rank" : 4 }
-            ]}
         ]
     }
     
@@ -57,9 +49,7 @@ TEST_CASE("traversals can be found from reads", "[genotyper]") {
     
     // Make an actual graph
     VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    vg::io::json2graph(graph_json, &graph);
     
     // Find the snarls
     SnarlManager manager = CactusSnarlFinder(graph).find_snarls();
diff --git a/src/unittest/haplotypes.cpp b/src/unittest/haplotypes.cpp
index e441bbe197..9e4e04475b 100644
--- a/src/unittest/haplotypes.cpp
+++ b/src/unittest/haplotypes.cpp
@@ -4,8 +4,10 @@
 
 #include "catch.hpp"
 #include "haplotypes.hpp"
+#include "../io/json2graph.hpp"
 #include "xg.hpp"
 #include "vg.hpp"
+#include <bdsg/hash_graph.hpp>
 
 #include <numeric>
 
@@ -66,7 +68,7 @@ TEST_CASE("We can represent appropriate graphs according to linear reference", "
   )";
   
   thread_t SNP_thread = {tm[1], tm[3], tm[4]};
-    
+
   string del_graph_json = R"(
   {"node":[
     {"id":1,"sequence":"AAA"},
@@ -89,22 +91,24 @@ TEST_CASE("We can represent appropriate graphs according to linear reference", "
     ]}
   ]}
   )";
-  
+
   thread_t del_ref_thread = {tm[1], tm[2], tm[4]};
   thread_t del_thread = {tm[1], tm[4]};
-  
-  vg::Graph SNP_proto_graph;
-  json2pb(SNP_proto_graph, SNP_graph_json.c_str(), SNP_graph_json.size());
+
+  // Build the SNP graph
+  bdsg::HashGraph SNP_graph;
+  vg::io::json2graph(SNP_graph_json, &SNP_graph);
   // Build the xg index
   xg::XG SNP_xg_index;
-  SNP_xg_index.from_path_handle_graph(vg::VG(SNP_proto_graph));
+  SNP_xg_index.from_path_handle_graph(SNP_graph);
   vg::path_handle_t SNP_ref_path_handle = SNP_xg_index.get_path_handle("reference");
-  
-  vg::Graph del_proto_graph;
-  json2pb(del_proto_graph, del_graph_json.c_str(), del_graph_json.size());
+
+  // Build the del graph
+  bdsg::HashGraph del_graph;
+  vg::io::json2graph(del_graph_json, &del_graph);
   // Build the xg index
   xg::XG del_xg_index;
-  del_xg_index.from_path_handle_graph(vg::VG(del_proto_graph));
+  del_xg_index.from_path_handle_graph(del_graph);
   vg::path_handle_t del_ref_path_handle = del_xg_index.get_path_handle("reference");
   
   // NEGATIVE SNVs
@@ -159,18 +163,20 @@ TEST_CASE("We can represent appropriate graphs according to linear reference", "
   
   thread_t double_thread = {tm[1], tm[2], tm[4]};
 
-  vg::Graph long_proto_graph;
-  json2pb(long_proto_graph, long_graph_json.c_str(), long_graph_json.size());
+  // Build the long graph
+  bdsg::HashGraph long_graph;
+  vg::io::json2graph(long_graph_json, &long_graph);
   // Build the xg index
   xg::XG long_xg_index;
-  long_xg_index.from_path_handle_graph(vg::VG(long_proto_graph));
+  long_xg_index.from_path_handle_graph(long_graph);
   vg::path_handle_t long_ref_path_handle = long_xg_index.get_path_handle("reference");
-  
-  vg::Graph double_proto_graph;
-  json2pb(double_proto_graph, double_graph_json.c_str(), double_graph_json.size());
+
+  // Build the double graph
+  bdsg::HashGraph double_graph;
+  vg::io::json2graph(double_graph_json, &double_graph);
   // Build the xg index
   xg::XG double_xg_index;
-  double_xg_index.from_path_handle_graph(vg::VG(double_proto_graph));
+  double_xg_index.from_path_handle_graph(double_graph);
   vg::path_handle_t double_ref_path_handle = double_xg_index.get_path_handle("reference");
 
   string matching_test_file = "matching_test.slls";
@@ -382,13 +388,13 @@ TEST_CASE("We can score haplotypes using GBWT", "[haplo-score][gbwt]") {
 TEST_CASE("We can recognize a required crossover", "[hapo-score][gbwt]") {
   // This graph is the start of xy2 from test/small
   string graph_json = R"({"node": [{"id": 1, "sequence": "CAAATAAGGCTT"}, {"id": 2, "sequence": "G"}, {"id": 3, "sequence": "GGAAATTTTC"}, {"id": 4, "sequence": "C"}, {"id": 5, "sequence": "TGGAGTTCTATTATATTCC"}, {"id": 6, "sequence": "G"}, {"id": 7, "sequence": "A"}, {"id": 8, "sequence": "ACTCTCTGGTTCCTG"}, {"id": 9, "sequence": "A"}, {"id": 10, "sequence": "G"}, {"id": 11, "sequence": "TGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCA"}], "edge": [{"from": 1, "to": 2}, {"from": 1, "to": 3}, {"from": 2, "to": 3}, {"from": 3, "to": 4}, {"from": 3, "to": 5}, {"from": 4, "to": 5}, {"from": 5, "to": 6}, {"from": 5, "to": 7}, {"from": 6, "to": 8}, {"from": 7, "to": 8}, {"from": 8, "to": 9}, {"from": 8, "to": 10}, {"from": 9, "to": 11}, {"from": 10, "to": 11}]})";
-  
-  // Load the JSON
-  vg::Graph proto_graph;
-  json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+
+  // Load the JSON into a HashGraph
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
   // Build the xg index
   xg::XG xg_index;
-  xg_index.from_path_handle_graph(vg::VG(proto_graph));
+  xg_index.from_path_handle_graph(graph);
     
   gbwt::Verbosity::set(gbwt::Verbosity::SILENT);
   gbwt::DynamicGBWT* gbwt_index = new gbwt::DynamicGBWT;
diff --git a/src/unittest/indexed_vg.cpp b/src/unittest/indexed_vg.cpp
index 7f74d92193..27504dea9f 100644
--- a/src/unittest/indexed_vg.cpp
+++ b/src/unittest/indexed_vg.cpp
@@ -40,7 +40,7 @@ TEST_CASE("An IndexedVG can be created for a single node", "[handle][indexed-vg]
         ]
     })";
     
-    // Load the JSON
+    // Load the JSON to Protobuf specifically.
     Graph proto_graph;
     json2pb(proto_graph, graph_json.c_str(), graph_json.size());
     
diff --git a/src/unittest/mapper.cpp b/src/unittest/mapper.cpp
index 2caf42d076..17f81fe17b 100644
--- a/src/unittest/mapper.cpp
+++ b/src/unittest/mapper.cpp
@@ -1,9 +1,10 @@
 /// \file mapper.cpp
-///  
+///
 /// unit tests for the mapper
 
 #include <iostream>
 #include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include <vg/vg.pb.h>
 #include <bdsg/hash_graph.hpp>
 #include "../mapper.hpp"
@@ -25,14 +26,10 @@ TEST_CASE( "Mapper can map to a one-node graph", "[mapping][mapper]" ) {
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -245,14 +242,10 @@ TEST_CASE( "Mapper finds optimal mapping for read starting with node-border MEM"
     {"position":{"node_id":1444},"rank":1059},
     {"position":{"node_id":1445},"rank":1060}]}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -311,14 +304,10 @@ TEST_CASE( "Mapper can annotate positions correctly on both strands", "[mapper][
         ]}
     ]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
diff --git a/src/unittest/minimizer_mapper.cpp b/src/unittest/minimizer_mapper.cpp
index 3ecd5de147..84628276e4 100644
--- a/src/unittest/minimizer_mapper.cpp
+++ b/src/unittest/minimizer_mapper.cpp
@@ -3,8 +3,8 @@
 /// unit tests for the minimizer mapper
 
 #include <iostream>
-#include "vg/io/json2pb.h"
 #include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 #include "../minimizer_mapper.hpp"
 #include "../build_index.hpp"
@@ -450,15 +450,13 @@ TEST_CASE("MinimizerMapper can map an empty string between odd points", "[giraff
                 {"id": "55511925", "sequence": "CTTCCTTCC"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("");
-        
+
         pos_t left_anchor {55511921, false, 5}; // This is on the final base of the node
         pos_t right_anchor {55511925, false, 6};
         
@@ -480,7 +478,7 @@ TEST_CASE("MinimizerMapper can map an empty string between odd points", "[giraff
 TEST_CASE("MinimizerMapper can map with an initial deletion", "[giraffe][mapping][right_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [
                 {"from": "1", "to": "2"},
@@ -492,12 +490,10 @@ TEST_CASE("MinimizerMapper can map with an initial deletion", "[giraffe][mapping
                 {"id": "3", "sequence": "CATTAG"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("CATTAG");
         
@@ -527,7 +523,7 @@ TEST_CASE("MinimizerMapper can map with an initial deletion", "[giraffe][mapping
 TEST_CASE("MinimizerMapper can map with an initial deletion on a multi-base node", "[giraffe][mapping][right_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [
                 {"from": "1", "to": "2"},
@@ -539,12 +535,10 @@ TEST_CASE("MinimizerMapper can map with an initial deletion on a multi-base node
                 {"id": "3", "sequence": "CATTAG"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("CATTAG");
         
@@ -574,7 +568,7 @@ TEST_CASE("MinimizerMapper can map with an initial deletion on a multi-base node
 TEST_CASE("MinimizerMapper can map right off the past-the-end base", "[giraffe][mapping][right_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [
                 {"from": "1", "to": "2"},
@@ -586,15 +580,13 @@ TEST_CASE("MinimizerMapper can map right off the past-the-end base", "[giraffe][
                 {"id": "3", "sequence": "CATTAG"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("CATTAG");
-        
+
         pos_t left_anchor {1, false, 1}; // This is the past-end position
         pos_t right_anchor = empty_pos_t();
         
@@ -635,15 +627,13 @@ TEST_CASE("MinimizerMapper can compute longest detectable gap in range", "[giraf
 TEST_CASE("MinimizerMapper can find a significant indel instead of a tempting softclip", "[giraffe][mapping][left_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [{"from": "30788083", "to": "30788088"}, {"from": "30788083", "to": "30788084"}, {"from": "30788074", "to": "30788075"}, {"from": "30788074", "to": "30788076"}, {"from": "30788079", "to": "30788080"}, {"from": "30788079", "to": "30788081"}, {"from": "30788086", "to": "30788088"}, {"from": "30788086", "to": "30788087", "to_end": true}, {"from": "30788075", "to": "30788077"}, {"from": "30788073", "to": "30788074"}, {"from": "30788078", "to": "30788079"}, {"from": "30788077", "to": "30788078"}, {"from": "30788084", "to": "30788088"}, {"from": "30788084", "to": "30788085"}, {"from": "30788076", "to": "30788077"}, {"from": "30788087", "from_start": true, "to": "30788088"}, {"from": "30788081", "to": "30788082"}, {"from": "30788080", "to": "30788082"}, {"from": "30788082", "to": "30788088"}, {"from": "30788082", "to": "30788083"}, {"from": "30788085", "to": "30788086"}], "node": [{"id": "30788083", "sequence": "AAA"}, {"id": "30788074", "sequence": "AAAAAAAATACAAAAAATTAGC"}, {"id": "30788079", "sequence": "CGCCACTGCACTCCAGCCTGGGC"}, {"id": "30788086", "sequence": "AAAAAAA"}, {"id": "30788075", "sequence": "T"}, {"id": "30788073", "sequence": "GAAAGAGAGTTGTTTAAATTCCATAGTTAGGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTA"}, {"id": "30788078", "sequence": "G"}, {"id": "30788077", "sequence": "GGGCGTGGTAGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATC"}, {"id": "30788084", "sequence": "A"}, {"id": "30788088", "sequence": "AATTCCATAGTTAGAAAAATAAGACATATCAGGTTTTCAAAAAGTGTAGCCATTTTCTGTTTCTAAAAGGGACACTTAAAGTGAAA"}, {"id": "30788076", "sequence": "C"}, {"id": "30788087", "sequence": "T"}, {"id": "30788081", "sequence": "A"}, {"id": "30788080", "sequence": "G"}, {"id": "30788082", "sequence": "ACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAA"}, {"id": "30788085", "sequence": "AA"}]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
         
         Alignment aln;
         aln.set_sequence("TTGAAAACCTGATATGTCTTATTTTTCTAACTATGGAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCTACCACGCCCGGCTAATTTTTTGTATTTTTTTT");
@@ -854,9 +844,8 @@ TEST_CASE("MinimizerMapper can extract a strand-split dagified local graph witho
                      {"id": "60245278", "sequence": "GATTACAGATTACA"}]
         }
     )";
-    vg::Graph graph_chunk;
-    json2pb(graph_chunk, graph_json.c_str(), graph_json.size());
-    vg::VG graph(graph_chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     TestMinimizerMapper::with_dagified_local_graph(make_pos_t(60245283, false, 10), empty_pos_t(), 50, graph, [&](DeletableHandleGraph& dagified_graph, const handle_t& left_anchor_handle, const handle_t& right_anchor_handle, const std::function<std::pair<nid_t, bool>(const handle_t&)>& dagified_handle_to_base) {
         // The graph started as a stick
diff --git a/src/unittest/multipath_alignment_graph.cpp b/src/unittest/multipath_alignment_graph.cpp
index bea5f687aa..d78e19d6f1 100644
--- a/src/unittest/multipath_alignment_graph.cpp
+++ b/src/unittest/multipath_alignment_graph.cpp
@@ -3,7 +3,8 @@
 /// unit tests for the multipath mapper's MultipathAlignmentGraph
 
 #include <iostream>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 #include "../cactus_snarl_finder.hpp"
 #include "../integrated_snarl_finder.hpp"
@@ -47,13 +48,9 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG vg;
-    vg.extend(proto_graph);
-    
+    bdsg::HashGraph vg;
+    ::vg::io::json2graph(graph_json, &vg);
+
     // Make snarls on it
     CactusSnarlFinder bubble_finder(vg);
     IntegratedSnarlFinder snarl_finder(vg);
diff --git a/src/unittest/multipath_mapper.cpp b/src/unittest/multipath_mapper.cpp
index be6d3b6194..bc1dc4cdd9 100644
--- a/src/unittest/multipath_mapper.cpp
+++ b/src/unittest/multipath_mapper.cpp
@@ -4,7 +4,9 @@
 
 #include <iostream>
 #include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include <vg/vg.pb.h>
+#include <bdsg/hash_graph.hpp>
 #include "../multipath_mapper.hpp"
 #include "../build_index.hpp"
 #include "xg.hpp"
@@ -111,7 +113,7 @@ TEST_CASE( "MultipathMapper::read_coverage works", "[multipath][mapping][multipa
 }
 
 TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][multipathmapper]" ) {
-    
+
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GATTACA"}],
         "path": [
@@ -120,14 +122,10 @@ TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][
             ]}
         ]
     })";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+
+    // Load the JSON into a HashGraph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -135,17 +133,17 @@ TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][
     // Make pointers to fill in
     gcsa::GCSA* gcsaidx = nullptr;
     gcsa::LCPArray* lcpidx = nullptr;
-    
+
     // Build the GCSA index
     build_gcsa_lcp(graph, gcsaidx, lcpidx, 16, 3);
-    
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
-    
+    xg_index.from_path_handle_graph(graph);
+
     // Make a multipath mapper to map against the graph.
     TestMultipathMapper mapper(&xg_index, gcsaidx, lcpidx);
-    
+
     // Make an Alignment that we're pretending we're doing
     Alignment aln;
     aln.set_sequence("GATTACA");
@@ -264,7 +262,7 @@ TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][
 }
     
 TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][multipathmapper]" ) {
-    
+
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GATTACA"}],
         "path": [
@@ -273,14 +271,10 @@ TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][
             ]}
         ]
     })";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+
+    // Load the JSON into a HashGraph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -291,11 +285,11 @@ TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][
     
     // Build the GCSA index
     build_gcsa_lcp(graph, gcsaidx, lcpidx, 16, 3);
-    
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
-    
+
     // Make a multipath mapper to map against the graph.
     MultipathMapper mapper(&xg_index, gcsaidx, lcpidx);
     // Lower the max mapping quality so that it thinks it can find unambiguous mappings of
@@ -422,16 +416,12 @@ TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][
 }
 
 TEST_CASE( "MultipathMapper can work on a bigger graph", "[multipath][mapping][multipathmapper]" ) {
-    
+
     string graph_json = R"({"node":[{"sequence":"CTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATG","id":12},{"sequence":"A","id":2},{"sequence":"CAAATAAGGCTTGGAAATTTTCTGGAGTTCTATTATATTCCAACTCTCTGGTTCCTGGTGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTT","id":3},{"sequence":"TTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTT","id":4},{"sequence":"CAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTA","id":5},{"sequence":"CTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTT","id":6},{"sequence":"ACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAAC","id":7},{"sequence":"ACAGTCAACGCTCAATACAAGGGACATTAGGATTGGCAGTAGCTCAGAGATCTCTCTGCTCACCGTGATCTTCAAGTTTGAAAATTGCATCTCAAATCTA","id":8},{"sequence":"AGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAA","id":9},{"sequence":"CAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATC","id":10},{"sequence":"TTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAG","id":11}],"path":[{"name":"x","mapping":[{"position":{"node_id":3},"edit":[{"from_length":100,"to_length":100}],"rank":1},{"position":{"node_id":4},"edit":[{"from_length":100,"to_length":100}],"rank":2},{"position":{"node_id":5},"edit":[{"from_length":100,"to_length":100}],"rank":3},{"position":{"node_id":6},"edit":[{"from_length":100,"to_length":100}],"rank":4},{"position":{"node_id":7},"edit":[{"from_length":100,"to_length":100}],"rank":5},{"position":{"node_id":8},"edit":[{"from_length":100,"to_length":100}],"rank":6},{"position":{"node_id":9},"edit":[{"from_length":100,"to_length":100}],"rank":7},{"position":{"node_id":10},"edit":[{"from_length":100,"to_length":100}],"rank":8},{"position":{"node_id":11},"edit":[{"from_length":100,"to_length":100}],"rank":9},{"position":{"node_id":12},"edit":[{"from_length":100,"to_length":100}],"rank":10},{"position":{"node_id":2},"edit":[{"from_length":1,"to_length":1}],"rank":11}]}],"edge":[{"from":12,"to":2},{"from":3,"to":4},{"from":4,"to":5},{"from":5,"to":6},{"from":6,"to":7},{"from":7,"to":8},{"from":8,"to":9},{"from":9,"to":10},{"from":10,"to":11},{"from":11,"to":12}]})";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+
+    // Load the JSON into a HashGraph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -442,11 +432,11 @@ TEST_CASE( "MultipathMapper can work on a bigger graph", "[multipath][mapping][m
     
     // Build the GCSA index
     build_gcsa_lcp(graph, gcsaidx, lcpidx, 16, 3);
-    
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
-    
+    xg_index.from_path_handle_graph(graph);
+
     // Make a multipath mapper to map against the graph.
     TestMultipathMapper mapper(&xg_index, gcsaidx, lcpidx);
     // Lower the max mapping quality so that it thinks it can find unambiguous mappings of
diff --git a/src/unittest/packed_structs.cpp b/src/unittest/packed_structs.cpp
index 9c0075751e..512e638620 100644
--- a/src/unittest/packed_structs.cpp
+++ b/src/unittest/packed_structs.cpp
@@ -69,7 +69,7 @@ using namespace std;
                     case APPEND:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_vec.push_back(next_val);
-                            dyn_vec.append(next_val);
+                            dyn_vec.push_back(next_val);
                             next_val++;
                         }
                         
@@ -79,7 +79,7 @@ using namespace std;
                         if (!std_vec.empty()) {
                             for (size_t k = 0; k < pops_per_op; k++) {
                                 std_vec.pop_back();
-                                dyn_vec.pop();
+                                dyn_vec.pop_back();
                             }
                         }
                         
@@ -161,7 +161,7 @@ using namespace std;
                     case APPEND:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_vec.push_back(next_val);
-                            dyn_vec.append(next_val);
+                            dyn_vec.push_back(next_val);
                             next_val = val_distr(prng);
                         }
                         
@@ -171,7 +171,7 @@ using namespace std;
                         if (!std_vec.empty()) {
                             for (size_t k = 0; k < pops_per_op; k++) {
                                 std_vec.pop_back();
-                                dyn_vec.pop();
+                                dyn_vec.pop_back();
                             }
                         }
                         
@@ -252,7 +252,7 @@ using namespace std;
                     case APPEND_LEFT:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_deq.push_front(next_val);
-                            suc_deq.append_front(next_val);
+                            suc_deq.push_front(next_val);
                             next_val++;
                         }
                         
@@ -269,7 +269,7 @@ using namespace std;
                     case APPEND_RIGHT:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_deq.push_back(next_val);
-                            suc_deq.append_back(next_val);
+                            suc_deq.push_back(next_val);
                             next_val++;
                         }
                         
diff --git a/src/unittest/path_component_index.cpp b/src/unittest/path_component_index.cpp
index 058f4bf9c1..edd3a6013a 100644
--- a/src/unittest/path_component_index.cpp
+++ b/src/unittest/path_component_index.cpp
@@ -8,7 +8,8 @@
 #include "path_component_index.hpp"
 #include "xg.hpp"
 #include "vg.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 
 namespace vg {
@@ -17,14 +18,14 @@ namespace unittest {
     TEST_CASE("Path component memoization produces expected results", "[pathcomponent]") {
         
         string graph_json = R"({"node": [{"sequence": "AAACCC", "id": 1}, {"sequence": "CACACA", "id": 2}, {"sequence": "CACACA", "id": 3}, {"sequence": "TTTTGG", "id": 4}, {"sequence": "ACGTAC", "id": 5}], "path": [{"name": "one", "mapping": [{"position": {"node_id": 1}, "rank": 1}, {"position": {"node_id": 2}, "rank": 2}]}, {"name": "three", "mapping": [{"position": {"node_id": 2}, "rank": 1}, {"position": {"node_id": 3}, "rank": 2}]}, {"name": "two", "mapping": [{"position": {"node_id": 4}, "rank": 1}, {"position": {"node_id": 5}, "rank": 2}]}], "edge": [{"from": 1, "to": 2}, {"from": 2, "to": 3}, {"from": 4, "to": 5}]})";
-        
+
         // Load the JSON
-        Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         // Build the xg index
         xg::XG xg_index;
-        xg_index.from_path_handle_graph(VG(proto_graph));
+        xg_index.from_path_handle_graph(graph);
         
         
         unordered_set<path_handle_t> comp_1;
diff --git a/src/unittest/path_index.cpp b/src/unittest/path_index.cpp
index b70152ae2d..e1facc2977 100644
--- a/src/unittest/path_index.cpp
+++ b/src/unittest/path_index.cpp
@@ -5,9 +5,9 @@
 
 #include <iostream>
 #include <string>
-#include "vg/io/json2pb.h"
-#include <vg/vg.pb.h>
+#include "../io/json2graph.hpp"
 #include "../path_index.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "catch.hpp"
 
 namespace vg {
@@ -58,15 +58,11 @@ const string path_index_graph_1 = R"(
 
 
 TEST_CASE("PathIndex can be created", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -78,13 +74,9 @@ TEST_CASE("PathIndex can be created", "[pathindex]") {
 TEST_CASE("PathIndex translation can change a node ID", "[pathindex]") {
 
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -115,15 +107,11 @@ TEST_CASE("PathIndex translation can change a node ID", "[pathindex]") {
 }
 
 TEST_CASE("PathIndex translation can divide a node", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -174,15 +162,11 @@ TEST_CASE("PathIndex translation can divide a node", "[pathindex]") {
 }
 
 TEST_CASE("PathIndex translation can create reverse strand mappings", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -235,15 +219,11 @@ TEST_CASE("PathIndex translation can create reverse strand mappings", "[pathinde
 }
 
 TEST_CASE("PathIndex translation can handle translations articulated for the reverse strand", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -300,15 +280,11 @@ TEST_CASE("PathIndex translation can handle translations articulated for the rev
 }
 
 TEST_CASE("PathIndex translation can divide the last node", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
diff --git a/src/unittest/phase_unfolder.cpp b/src/unittest/phase_unfolder.cpp
index 0c79972941..36cfbca9de 100644
--- a/src/unittest/phase_unfolder.cpp
+++ b/src/unittest/phase_unfolder.cpp
@@ -12,7 +12,8 @@
 #include <gbwt/dynamic_gbwt.h>
 
 #include "../phase_unfolder.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "xg.hpp"
 
 #include "catch.hpp"
@@ -210,10 +211,10 @@ const std::string unfolder_graph_path = R"(
 TEST_CASE("PhaseUnfolder can unfold XG paths", "[phaseunfolder][indexing]") {
 
     // Build an XG index with a path.
-    Graph graph_with_path;
-    json2pb(graph_with_path, unfolder_graph_path.c_str(), unfolder_graph_path.size());
+    bdsg::HashGraph graph_with_path;
+    vg::io::json2graph(unfolder_graph_path, &graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_with_path));
+    xg_index.from_path_handle_graph(graph_with_path);
 
     // Build an empty GBWT index.
     gbwt::GBWT gbwt_index;
@@ -224,9 +225,7 @@ TEST_CASE("PhaseUnfolder can unfold XG paths", "[phaseunfolder][indexing]") {
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -255,10 +254,10 @@ TEST_CASE("PhaseUnfolder can unfold XG paths", "[phaseunfolder][indexing]") {
 TEST_CASE("PhaseUnfolder can restore XG paths", "[phaseunfolder][indexing]") {
 
     // Build an XG index with a path.
-    Graph graph_with_path;
-    json2pb(graph_with_path, unfolder_graph_path.c_str(), unfolder_graph_path.size());
+    bdsg::HashGraph graph_with_path;
+    vg::io::json2graph(unfolder_graph_path, &graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_with_path));
+    xg_index.from_path_handle_graph(graph_with_path);
 
     // Build an empty GBWT index.
     gbwt::GBWT gbwt_index;
@@ -269,9 +268,7 @@ TEST_CASE("PhaseUnfolder can restore XG paths", "[phaseunfolder][indexing]") {
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -299,10 +296,10 @@ TEST_CASE("PhaseUnfolder can restore XG paths", "[phaseunfolder][indexing]") {
 TEST_CASE("PhaseUnfolder can unfold GBWT threads", "[phaseunfolder][indexing]") {
 
     // Build an XG index without a path.
-    Graph graph_without_path;
-    json2pb(graph_without_path, unfolder_graph.c_str(), unfolder_graph.size());
+    bdsg::HashGraph graph_without_path;
+    vg::io::json2graph(unfolder_graph, &graph_without_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_without_path));
+    xg_index.from_path_handle_graph(graph_without_path);
 
     // Build a GBWT with three threads including a duplicate. We want to have
     // only one instance of short_path unfolded, but we want separate copies
@@ -335,9 +332,7 @@ TEST_CASE("PhaseUnfolder can unfold GBWT threads", "[phaseunfolder][indexing]")
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -366,10 +361,10 @@ TEST_CASE("PhaseUnfolder can unfold GBWT threads", "[phaseunfolder][indexing]")
 TEST_CASE("PhaseUnfolder can unfold both XG paths and GBWT threads", "[phaseunfolder][indexing]") {
 
     // Build an XG index with a path.
-    Graph graph_with_path;
-    json2pb(graph_with_path, unfolder_graph_path.c_str(), unfolder_graph_path.size());
+    bdsg::HashGraph graph_with_path;
+    vg::io::json2graph(unfolder_graph_path, &graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_with_path));
+    xg_index.from_path_handle_graph(graph_with_path);
 
     // Build a GBWT with three threads including a duplicate. We want to have
     // only one instance of short_path unfolded, but we want separate copies
@@ -402,9 +397,7 @@ TEST_CASE("PhaseUnfolder can unfold both XG paths and GBWT threads", "[phaseunfo
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -501,10 +494,10 @@ const std::string unfolder_graph_simple_path = R"(
 TEST_CASE("PhaseUnfolder can merge shared prefixes and suffixes", "[phaseunfolder][indexing]") {
 
     // Build an XG index.
-    Graph simple_graph;
-    json2pb(simple_graph, unfolder_graph_simple.c_str(), unfolder_graph_simple.size());
+    bdsg::HashGraph simple_graph;
+    vg::io::json2graph(unfolder_graph_simple, &simple_graph);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(simple_graph));
+    xg_index.from_path_handle_graph(simple_graph);
 
     // Build a GBWT with both possible threads.
     gbwt::vector_type upper_path {
@@ -536,9 +529,7 @@ TEST_CASE("PhaseUnfolder can merge shared prefixes and suffixes", "[phaseunfolde
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph_simple.c_str(), unfolder_graph_simple.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph_simple, &vg_graph);
 
     // Remove the bubble, including its endpoints.
     std::set<vg::id_t> to_remove { 3, 4, 5, 6 };
@@ -566,10 +557,10 @@ TEST_CASE("PhaseUnfolder can merge shared prefixes and suffixes", "[phaseunfolde
 TEST_CASE("PhaseUnfolder can extend short threads", "[phaseunfolder][indexing]") {
 
     // Build an XG index.
-    Graph simple_graph_with_path;
-    json2pb(simple_graph_with_path, unfolder_graph_simple_path.c_str(), unfolder_graph_simple_path.size());
+    bdsg::HashGraph simple_graph_with_path;
+    vg::io::json2graph(unfolder_graph_simple_path, &simple_graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(simple_graph_with_path));
+    xg_index.from_path_handle_graph(simple_graph_with_path);
 
     // Build a GBWT for the fragment that is different from the reference.
     gbwt::vector_type short_fragment {
@@ -586,9 +577,7 @@ TEST_CASE("PhaseUnfolder can extend short threads", "[phaseunfolder][indexing]")
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph_simple.c_str(), unfolder_graph_simple.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph_simple, &vg_graph);
 
     // Remove the bubble, including its endpoints.
     std::set<vg::id_t> to_remove { 3, 4, 5, 6 };
diff --git a/src/unittest/randomly_flipped_nodes.cpp b/src/unittest/randomly_flipped_nodes.cpp
new file mode 100644
index 0000000000..455bdd18ae
--- /dev/null
+++ b/src/unittest/randomly_flipped_nodes.cpp
@@ -0,0 +1,179 @@
+#include "catch.hpp"
+#include "../handle.hpp"
+#include "../utility.hpp"
+#include <bdsg/hash_graph.hpp>
+
+#include "support/randomly_flipped_nodes.hpp"
+#include "support/randomness.hpp"
+#include "support/random_graph.hpp"
+
+#include <set>
+#include <random>
+
+namespace vg {
+namespace unittest {
+
+/// Get the canonicalized set of edge sequence pairs from a graph.
+/// Each edge is represented as a pair of sequences (left_seq, right_seq) read
+/// in the orientation of the edge. To canonicalize, we compare each pair
+/// against its reverse complement (RC(right_seq), RC(left_seq)) and keep the
+/// lexicographically smaller one.
+///
+/// This doesn't fully constrain the graph, but if this doesn't match what it's
+/// supposed to, it can tell us that the graph smells off and is wrong.
+static set<pair<string, string>> canonical_edge_pairs(const HandleGraph& graph) {
+    set<pair<string, string>> result;
+    graph.for_each_edge([&](const edge_t& edge) {
+        string left_seq = graph.get_sequence(edge.first);
+        string right_seq = graph.get_sequence(edge.second);
+
+        // The reverse complement pair: RC(right) on the left, RC(left) on the right
+        string rc_right = reverse_complement(right_seq);
+        string rc_left = reverse_complement(left_seq);
+
+        pair<string, string> forward_pair = {left_seq, right_seq};
+        pair<string, string> rc_pair = {rc_right, rc_left};
+
+        // Use the lexicographically smaller one as canonical
+        if (rc_pair < forward_pair) {
+            result.insert(rc_pair);
+        } else {
+            result.insert(forward_pair);
+        }
+        return true;
+    });
+    return result;
+}
+
+/// Make sure that observed and expected graphs are not obviously not
+/// isomorphic.
+static void validate_graph(const HandleGraph& observed, const HandleGraph& expected, const set<pair<string, string>>& expected_edges) {
+    REQUIRE(observed.get_node_count() == expected.get_node_count());
+    REQUIRE(observed.get_edge_count() == expected.get_edge_count());
+
+    auto observed_edges = canonical_edge_pairs(observed);
+    REQUIRE(observed_edges == expected_edges);
+}
+
+TEST_CASE("randomly_flipped_nodes preserves graph structure on a simple linear graph", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    std::string stick_sequence = "GGACTGACTCGCATGTCGAGCGACTCGCGCGAGCTATCGTAGTACGCGAGTCATATTATATTATCACG";
+    size_t node_length = 3;
+    handle_t prev_handle;
+    for (size_t i = 0; i < stick_sequence.size(); i += node_length) {
+        handle_t h = graph.create_handle(stick_sequence.substr(i, node_length));
+        if (i > 0) {
+            graph.create_edge(prev_handle, h);
+        }
+        prev_handle = h;
+    }
+
+    auto original_edges = canonical_edge_pairs(graph);
+
+    SECTION("flipping no nodes preserves edges exactly") {
+        default_random_engine gen(test_seed_source());
+        auto flipped = randomly_flipped_nodes(graph, 0.0, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+
+    SECTION("flipping all nodes preserves canonical edge pairs") {
+        default_random_engine gen(test_seed_source());
+        auto flipped = randomly_flipped_nodes(graph, 1.0, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+
+    SECTION("flipping 50% of nodes preserves canonical edge pairs") {
+        default_random_engine gen(test_seed_source());
+        auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves structure on graph with reversing edges", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    handle_t h1 = graph.create_handle("GATT", 1);
+    handle_t h2 = graph.create_handle("ACA", 2);
+    handle_t h3 = graph.create_handle("CGAT", 3);
+    handle_t h4 = graph.create_handle("TCGAA", 4);
+
+    // Forward edges
+    graph.create_edge(h1, h2);
+    graph.create_edge(h2, h3);
+    graph.create_edge(h3, h4);
+    // Reversing edge: 4 fwd -> 3 rev
+    graph.create_edge(h4, graph.flip(h3));
+
+    auto original_edges = canonical_edge_pairs(graph);
+
+    default_random_engine gen(test_seed_source());
+    for (int i = 0; i < 10; i++) {
+        auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves structure on graph with self-loops", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    handle_t h1 = graph.create_handle("ACGT", 1);
+    handle_t h2 = graph.create_handle("TTCC", 2);
+
+    graph.create_edge(h1, h2);
+    // Self-loop on h1: fwd -> fwd
+    graph.create_edge(h1, h1);
+    // Inverting self-loop on h2: fwd -> rev
+    graph.create_edge(h2, graph.flip(h2));
+
+    auto original_edges = canonical_edge_pairs(graph);
+
+    default_random_engine gen(test_seed_source());
+    for (int i = 0; i < 10; i++) {
+        auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves structure on random graphs", "[randomly_flipped_nodes]") {
+    for (int trial = 0; trial < 5; trial++) {
+        bdsg::HashGraph graph;
+        random_graph(100, 10, 10, &graph);
+
+        auto original_edges = canonical_edge_pairs(graph);
+
+        default_random_engine gen(test_seed_source());
+        for (int i = 0; i < 5; i++) {
+            auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+            validate_graph(flipped, graph, original_edges);
+        }
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves node IDs", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("AAA", 5);
+    graph.create_handle("CCC", 10);
+    graph.create_handle("GGG", 15);
+    graph.create_edge(graph.get_handle(5), graph.get_handle(10));
+    graph.create_edge(graph.get_handle(10), graph.get_handle(15));
+
+    default_random_engine gen(test_seed_source());
+    auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+
+    REQUIRE(flipped.has_node(5));
+    REQUIRE(flipped.has_node(10));
+    REQUIRE(flipped.has_node(15));
+}
+
+TEST_CASE("randomly_flipped_nodes actually flips node sequences", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    handle_t h1 = graph.create_handle("AAAC", 1);  // RC = GTTT
+
+    default_random_engine gen(test_seed_source());
+    // Guarantee a flip
+    auto flipped = randomly_flipped_nodes(graph, 1.0, gen);
+
+    // The forward sequence should be the RC of the original
+    REQUIRE(flipped.get_sequence(flipped.get_handle(1)) == "GTTT");
+}
+
+} // namespace unittest
+} // namespace vg
diff --git a/src/unittest/readfilter.cpp b/src/unittest/readfilter.cpp
index cc1562f3f3..6d84fa0a38 100644
--- a/src/unittest/readfilter.cpp
+++ b/src/unittest/readfilter.cpp
@@ -5,6 +5,9 @@
 #include "catch.hpp"
 #include "readfilter.hpp"
 #include "xg.hpp"
+#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 namespace vg {
 namespace unittest {
@@ -44,13 +47,13 @@ TEST_CASE("reads with ambiguous ends can be trimmed", "[filter]") {
     
     )";
     
-    // Load it into Protobuf
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    
+    // Load the graph
+    bdsg::HashGraph chunk;
+    vg::io::json2graph(graph_json, &chunk);
+
     // Pass it over to XG
     xg::XG index;
-    index.from_path_handle_graph(VG(chunk));
+    index.from_path_handle_graph(chunk);
     
     // Make a ReadFilter;
     ReadFilter<Alignment> filter;
diff --git a/src/unittest/sampler.cpp b/src/unittest/sampler.cpp
index d8bb95b650..cda0147f57 100644
--- a/src/unittest/sampler.cpp
+++ b/src/unittest/sampler.cpp
@@ -6,11 +6,10 @@
 #include <unordered_set>
 #include <utility>
 
-#include "vg/io/json2pb.h"
-#include <vg/vg.pb.h>
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "../sampler.hpp"
 #include "../xg.hpp"
-#include "../vg.hpp"
 #include "catch.hpp"
 
 namespace vg {
@@ -28,13 +27,9 @@ TEST_CASE( "Sampler can sample from a 1-node graph", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
@@ -118,13 +113,9 @@ TEST_CASE( "position_at works", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
@@ -195,13 +186,9 @@ TEST_CASE( "Sampler can sample from a loop-containing path", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
@@ -259,13 +246,9 @@ TEST_CASE( "Sampler can across reversing edges", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
diff --git a/src/unittest/snarl_decomposition_fuzzer.cpp b/src/unittest/snarl_decomposition_fuzzer.cpp
new file mode 100644
index 0000000000..38742be20c
--- /dev/null
+++ b/src/unittest/snarl_decomposition_fuzzer.cpp
@@ -0,0 +1,339 @@
+#include "catch.hpp"
+#include "../handle.hpp"
+#include <bdsg/hash_graph.hpp>
+
+#include "support/snarl_decomposition_fuzzer.hpp"
+
+#include <vector>
+#include <unordered_set>
+
+namespace vg {
+namespace unittest {
+
+using ET = DecompositionEventType;
+using Event = DecompositionEvent;
+
+TEST_CASE("ReplaySnarlFinder replays events faithfully", "[snarl_decomposition_fuzzer]") {
+    // Build a small graph to get real handles
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+    graph.create_handle("AA", 5);
+
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, true},
+            {ET::END_CHAIN, 3, true},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+          {ET::END_SNARL, 5, false},
+        {ET::END_CHAIN, 5, false},
+    };
+
+    ReplaySnarlFinder finder(&graph, events);
+    std::vector<Event> captured = capture_events(finder, graph);
+
+    REQUIRE(captured == events);
+}
+
+TEST_CASE("SnarlDecompositionFuzzer passes through when nothing is flipped", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+    graph.create_handle("AA", 5);
+
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, true},
+            {ET::END_CHAIN, 3, true},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+          {ET::END_SNARL, 5, false},
+        {ET::END_CHAIN, 5, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    // No chains to flip
+    SnarlDecompositionFuzzer fuzzer(&graph, &replay, {});
+
+    std::vector<Event> captured = capture_events(fuzzer, graph);
+
+    REQUIRE(captured == events);
+}
+
+TEST_CASE("SnarlDecompositionFuzzer flips an outer chain", "[snarl_decomposition_fuzzer]") {
+    // Graph:
+    // Chain: 1fwd -> snarl(1fwd, 4fwd) -> snarl(4fwd, 5fwd) -> 5fwd
+    // Inside first snarl: chain 2rev->3rev
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+    graph.create_handle("AA", 5);
+    
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, true},
+            {ET::END_CHAIN, 3, true},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+          {ET::END_SNARL, 5, false},
+        {ET::END_CHAIN, 5, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flip outer chain only") {
+        // Flip the outer chain (1fwd -> 5fwd)
+        std::unordered_set<nid_t> flips {1, 5};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Expected after flipping the outer chain:
+        // Flipping a chain reverses everything inside it, including children.
+        // The nested chain 2rev->3rev gets reversed to 3fwd->2fwd as
+        // part of the parent flip.
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 5, true},
+              {ET::BEGIN_SNARL, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 3, false},
+                {ET::END_CHAIN, 2, false},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip outer and nested chain") {
+        // Flip outer chain (1fwd->5fwd) AND nested chain (2rev->3rev)
+        std::unordered_set<nid_t> flips {1, 5, 2, 3};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Expected: outer chain flipped (reversing everything, including
+        // the nested chain to 3fwd->2fwd), AND THEN the nested chain is
+        // flipped again back to its original orientation 2rev->3rev.
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 5, true},
+              {ET::BEGIN_SNARL, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 2, true},
+                {ET::END_CHAIN, 3, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip nested chain only") {
+        // Flip only the nested chain (2rev->3rev), outer stays
+        std::unordered_set<nid_t> flips {2, 3};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Outer chain not flipped, nested chain flipped
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 1, false},
+              {ET::BEGIN_SNARL, 1, false},
+                {ET::BEGIN_CHAIN, 3, false},
+                {ET::END_CHAIN, 2, false},
+              {ET::END_SNARL, 4, false},
+              {ET::BEGIN_SNARL, 4, false},
+              {ET::END_SNARL, 5, false},
+            {ET::END_CHAIN, 5, false},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+TEST_CASE("SnarlDecompositionFuzzer handles empty chain", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("ACGT", 1);
+
+    // An empty chain: begin and end with same handle, no snarls inside
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+        {ET::END_CHAIN, 1, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flipping an empty chain") {
+        std::unordered_set<nid_t> flips {1};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+TEST_CASE("SnarlDecompositionFuzzer handles multiple top-level chains", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+
+    // Two top-level chains in the root snarl
+    std::vector<Event> events = {
+        // Chain 1: 1fwd -> snarl -> 2fwd
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+          {ET::END_SNARL, 2, false},
+        {ET::END_CHAIN, 2, false},
+        // Chain 2: 3fwd -> snarl -> 4fwd
+        {ET::BEGIN_CHAIN, 3, false},
+          {ET::BEGIN_SNARL, 3, false},
+          {ET::END_SNARL, 4, false},
+        {ET::END_CHAIN, 4, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flip only first chain") {
+        std::unordered_set<nid_t> flips {1, 2};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 2, true},
+              {ET::BEGIN_SNARL, 2, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+            {ET::BEGIN_CHAIN, 3, false},
+              {ET::BEGIN_SNARL, 3, false},
+              {ET::END_SNARL, 4, false},
+            {ET::END_CHAIN, 4, false},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip both chains") {
+        std::unordered_set<nid_t> flips {1, 2, 3, 4};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 2, true},
+              {ET::BEGIN_SNARL, 2, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+            {ET::BEGIN_CHAIN, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+              {ET::END_SNARL, 3, true},
+            {ET::END_CHAIN, 3, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+TEST_CASE("SnarlDecompositionFuzzer handles deeply nested chains", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    for (nid_t i = 1; i <= 8; i++) {
+        graph.create_handle("A", i);
+    }
+
+    // Outer chain: 1->6
+    //   Snarl(1,4)
+    //     Inner chain: 2->3
+    //       Snarl(2,3) [leaf snarl, no children]
+    //   Snarl(4,6)
+    //     Inner chain: 5->5 [empty/trivial]
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, false},
+              {ET::BEGIN_SNARL, 2, false},
+              {ET::END_SNARL, 3, false},
+            {ET::END_CHAIN, 3, false},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+            {ET::BEGIN_CHAIN, 5, false},
+            {ET::END_CHAIN, 5, false},
+          {ET::END_SNARL, 6, false},
+        {ET::END_CHAIN, 6, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flip outer chain only") {
+        std::unordered_set<nid_t> flips {1, 6};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Inner chain and its snarls should flip too. 
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 6, true},
+              {ET::BEGIN_SNARL, 6, true},
+                {ET::BEGIN_CHAIN, 5, true},
+                {ET::END_CHAIN, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 3, true},
+                  {ET::BEGIN_SNARL, 3, true},
+                  {ET::END_SNARL, 2, true},
+                {ET::END_CHAIN, 2, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip outer and inner chain") {
+        std::unordered_set<nid_t> flips {1, 6, 2, 3};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Outer chain should flip but inner chain should flip back
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 6, true},
+              {ET::BEGIN_SNARL, 6, true},
+                {ET::BEGIN_CHAIN, 5, true},
+                {ET::END_CHAIN, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 2, false},
+                  {ET::BEGIN_SNARL, 2, false},
+                  {ET::END_SNARL, 3, false},
+                {ET::END_CHAIN, 3, false},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+} // namespace unittest
+} // namespace vg
diff --git a/src/unittest/snarl_distance_index.cpp b/src/unittest/snarl_distance_index.cpp
index 36a1b9b74e..d257e719eb 100644
--- a/src/unittest/snarl_distance_index.cpp
+++ b/src/unittest/snarl_distance_index.cpp
@@ -9,23 +9,34 @@
 #include <iostream>
 #include <sstream>
 #include <set>
-#include "vg/io/json2pb.h"
-#include <vg/vg.pb.h>
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "catch.hpp"
 #include "support/random_graph.hpp"
 #include "support/randomness.hpp"
+#include "support/randomly_flipped_nodes.hpp"
+#include "support/snarl_decomposition_fuzzer.hpp"
 #include "../snarl_distance_index.hpp"
 #include "../integrated_snarl_finder.hpp"
 #include "../genotypekit.hpp"
 #include "../traversal_finder.hpp"
+#include "../io/save_handle_graph.hpp"
 #include <vg/io/protobuf_emitter.hpp>
 #include <vg/io/vpkg.hpp>
 #include "xg.hpp"
+#include <handlegraph/algorithms/weakly_connected_components.hpp>
+#include <handlegraph/algorithms/find_shortest_paths.hpp>
 
 //#define debug
 
 namespace vg {
     namespace unittest {
+
+    // TODO: Having *any* operator<< overloads in vg::unittest seems to hide
+    // the ones that are just in vg, somehow.
+    using vg::operator<<;
+
+
     static pair<unordered_set<Node*>, unordered_set<Edge*> > pb_contents(
         VG& graph, const pair<unordered_set<id_t>, unordered_set<edge_t> >& contents) {
         pair<unordered_set<Node*>, unordered_set<Edge*> > ret;
@@ -192,7 +203,82 @@ namespace vg {
                 REQUIRE(distance_index.minimum_distance(2, true, 0, 2, true, 1) == 1);
             }
         }
-        TEST_CASE( "Nested chain with loop", "[snarl_distance]" ) {
+        TEST_CASE( "Can distance index nested chain without loop", "[snarl_distance]" ) {
+            bdsg::HashGraph graph;
+            handle_t h1 = graph.create_handle("G");
+            handle_t h2 = graph.create_handle("A");
+            handle_t h3 = graph.create_handle("T");
+            handle_t h4 = graph.create_handle("T");
+            handle_t h5 = graph.create_handle("A");
+            handle_t h6 = graph.create_handle("C");
+            handle_t h7 = graph.create_handle("A");
+            
+            // Wire it up as a stick
+            graph.create_edge(h1, h2);
+            graph.create_edge(h2, h3);
+            graph.create_edge(h3, h4);
+            graph.create_edge(h4, h5);
+            graph.create_edge(h5, h6);
+            graph.create_edge(h6, h7);
+
+            // Allow skipping a run of nodes to make a snarl with a child chain
+            graph.create_edge(h2, h5);
+
+            IntegratedSnarlFinder snarl_finder(graph);
+
+            SECTION("Snarl classifications are correct") {
+                SECTION("Distance index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder);
+                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+                SECTION("Distanceless index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder, 0);
+                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+            }
+        }
+        TEST_CASE( "Can distance index nested chain with a loop hiding in the middle", "[snarl_distance]" ) {
+            bdsg::HashGraph graph;
+            handle_t h1 = graph.create_handle("G");
+            handle_t h2 = graph.create_handle("A");
+            handle_t h3 = graph.create_handle("T");
+            handle_t h4 = graph.create_handle("T");
+            handle_t h5 = graph.create_handle("A");
+            handle_t h6 = graph.create_handle("C");
+            handle_t h7 = graph.create_handle("A");
+            
+            // Wire it up as a stick
+            graph.create_edge(h1, h2);
+            graph.create_edge(h2, h3);
+            graph.create_edge(h3, h4);
+            graph.create_edge(h4, h5);
+            graph.create_edge(h5, h6);
+            graph.create_edge(h6, h7);
+
+            // Allow skipping a run of nodes to make a snarl with a child chain that has a few nodes in it
+            graph.create_edge(h1, h6);
+
+            // Allow turning around with an edge hiding somewhere in the middle of the chain
+            graph.create_edge(h3, graph.flip(h3));
+
+            IntegratedSnarlFinder snarl_finder(graph);
+
+            SECTION("Snarl classifications are correct") {
+                SECTION("Distance index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder);
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+                SECTION("Distanceless index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder, 0);
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+            }
+        }
+        TEST_CASE( "Can distance index nested chain with a loop", "[snarl_distance]" ) {
         
             VG graph;
                 
@@ -230,7 +316,8 @@ namespace vg {
             Edge* e17 = graph.create_edge(n11, n12);
             Edge* e18 = graph.create_edge(n12, n13);
             
-            graph.serialize_to_file("test_graph.vg");
+            vg::io::save_handle_graph(&graph, "test_graph.vg");
+            
             //get the snarls
             IntegratedSnarlFinder snarl_finder(graph); 
             SECTION("Traversal of chain") {
@@ -248,16 +335,13 @@ namespace vg {
                     fill_in_distance_index(&distance_index, &graph, &snarl_finder);
                     REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))));
                     REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n8->id())))));
-                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), true));
-                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), false));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id())))));
                 } SECTION("Distanceless index") {
                     SnarlDistanceIndex distance_index;
                     fill_in_distance_index(&distance_index, &graph, &snarl_finder, 0);
-                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))), true, &graph));
-                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n8->id()))), true, &graph));
-                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), true, &graph));
-                    // TODO: This isn't true because it would be too much work to recursively check all children using only the graph
-                    //REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), false, &graph));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n8->id())))));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id())))));
                 }
             }
             SECTION("Minimum distances are correct") {
@@ -3577,12 +3661,9 @@ namespace vg {
             //    }
             //    )";
             //    
-            //    VG graph;
-            //    
             //    // Load up the graph
-            //    Graph g;
-            //    json2pb(g, graph_json.c_str(), graph_json.size());
-            //    graph.extend(g);
+            //    VG graph;
+            //    vg::io::json2graph(graph_json, &graph);
             //    
             //    // Define the one snarl
             //    Snarl snarl1;
@@ -3709,12 +3790,9 @@ namespace vg {
             //    string snarl2_json = R"({"type": 1, "end": {"node_id": 187209, "backward": true}, "start": {"node_id": 178895, "backward": true}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
             //    string snarl3_json = R"({"type": 1, "end": {"node_id": 178896}, "start": {"node_id": 178895}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
             //    
-            //    VG graph;
-            //    
             //    // Load up the graph
-            //    Graph g;
-            //    json2pb(g, graph_json.c_str(), graph_json.size());
-            //    graph.extend(g);
+            //    VG graph;
+            //    vg::io::json2graph(graph_json, &graph);
             //    
             //    // Load the snarls
             //    Snarl snarl1, snarl2, snarl3;
@@ -3885,9 +3963,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             // We need to see the path.
             REQUIRE(graph.paths.size() == 1);
@@ -4145,9 +4221,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4258,9 +4332,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4407,9 +4479,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
          
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4536,9 +4606,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4645,9 +4713,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4749,9 +4815,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4919,9 +4983,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -5042,9 +5104,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -6624,6 +6684,25 @@ namespace vg {
             }
         }
 
+        TEST_CASE( "Tiny oversized snarl", "[snarl_distance]" ) {
+          VG graph;
+          handle_t n1 = graph.create_handle("GCA");
+          handle_t n2 = graph.create_handle("T");
+          handle_t n3 = graph.create_handle("G");
+          handle_t n4 = graph.create_handle("CTGA");  
+
+          graph.create_edge(n1, n2);
+          graph.create_edge(n1, n3);
+          graph.create_edge(n2, n3); 
+          graph.create_edge(n2, n4);  
+          graph.create_edge(n3, n4); 
+          IntegratedSnarlFinder snarl_finder(graph);
+          SnarlDistanceIndex distance_index;
+          fill_in_distance_index(&distance_index, &graph, &snarl_finder, 1); 
+
+          REQUIRE(distance_index.minimum_distance(2, false, 0, 3, false, 0, false, &graph) == 1); 
+        } 
+
         TEST_CASE( "Oversized snarl","[snarl_distance]" ) {
             VG graph;
          
@@ -7372,6 +7451,9 @@ namespace vg {
         }
 
 
+        // TODO: This test case doesn't do anything (runs 0 iterations).
+        // When I tell it to actually run iterations, it fails.
+        // Has it ever worked?
         TEST_CASE("random test subgraph", "[snarl_distance][snarl_distance_subgraph]") {
 
             int64_t min = 20; int64_t max = 50;
@@ -7480,7 +7562,7 @@ namespace vg {
                                      << distance_index.minimum_distance(nodeID1, false, 0, node_id, true, 0)
                                      << " (" << dist_start_fd << " " << dist_end_fd << " " << dist_start_bk << " " << dist_end_bk << ") "
                                      << " is in the subgraph but shouldn't be " << endl;
-                                graph.serialize_to_file("test_graph.vg");
+                                vg::io::save_handle_graph(&graph, "test_graph.vg");
                             }
                             REQUIRE((start_forward || end_forward || in_forward || start_backward || end_backward || in_backward));
                         } else {
@@ -7491,7 +7573,7 @@ namespace vg {
                                      << distance_index.minimum_distance(nodeID1, false, 0,node_id, true, 0)
                                      << " (" << dist_start_fd << " " << dist_end_fd << " " << dist_start_bk << " " << dist_end_bk << ") "
                                      << " is not in the subgraph but should be " << endl;
-                                graph.serialize_to_file("test_graph.vg");
+                                vg::io::save_handle_graph(&graph, "test_graph.vg");
                                 REQUIRE(!(start_forward || end_forward || in_forward || start_backward || end_backward || in_backward));
                             }
                         }
@@ -7556,31 +7638,49 @@ namespace vg {
         */
         
         TEST_CASE( "Distance index can traverse all the snarls in random graphs",
-                  "[snarl_distance_random]" ) {
+                  "[snarl_distance][snarl_distance_random]" ) {
         
             // Each actual graph takes a fairly long time to do so we randomize sizes...
             
-            default_random_engine generator(test_seed_source());
+            std::default_random_engine generator(test_seed_source());
             
             for (size_t repeat = 0; repeat < 1000; repeat++) {
             
-                uniform_int_distribution<size_t> bases_dist(100, 1000);
+                std::uniform_int_distribution<size_t> bases_dist(100, 1000);
                 size_t bases = bases_dist(generator);
-                uniform_int_distribution<size_t> variant_bases_dist(1, bases/20);
+                std::uniform_int_distribution<size_t> variant_bases_dist(1, bases/20);
                 size_t variant_bases = variant_bases_dist(generator);
-                uniform_int_distribution<size_t> variant_count_dist(1, bases/30);
+                std::uniform_int_distribution<size_t> variant_count_dist(1, bases/30);
                 size_t variant_count = variant_count_dist(generator);
+                
+                std::uniform_real_distribution<double> flip_dist(0.0, 1.0);
+                double node_flip_fraction = flip_dist(generator);
+                double chain_flip_fraction = flip_dist(generator);
 
-                uniform_int_distribution<size_t> snarl_size_limit_dist(500, 1000);
+                std::uniform_int_distribution<size_t> snarl_size_limit_dist(2, 1000);
                 size_t size_limit = snarl_size_limit_dist(generator);
-                        
+
 #ifdef debug
-                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events" << endl;
+                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events with " << node_flip_fraction << " nodes flipped and " << chain_flip_fraction << " of chains flipped, with size limit " << size_limit << endl;
 #endif
-            
-                VG graph;
-                random_graph(bases, variant_bases, variant_count, &graph);
-                IntegratedSnarlFinder finder(graph); 
+               
+                // Generate a base graph
+                VG base_graph;
+                random_graph(bases, variant_bases, variant_count, &base_graph);
+                
+                // Flip some fraction of the nodes to their local reverse orientation
+                bdsg::HashGraph graph = randomly_flipped_nodes(base_graph, node_flip_fraction, generator);
+
+                // Find snarls
+                IntegratedSnarlFinder base_finder(graph);
+
+                // Flip some fraction of the chains to their opposite orientation.
+                // Note that we can't flip the snarls because the snarl decomposition
+                // requires snarls to be articulated as forward along their
+                // chains.
+                SnarlDecompositionFuzzer finder(&graph, &base_finder, chain_flip_fraction, generator);
+                
+                // Build the index
                 SnarlDistanceIndex distance_index;
                 fill_in_distance_index(&distance_index, &graph, &finder, size_limit);
 
@@ -7640,7 +7740,7 @@ namespace vg {
                             cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
                             cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
                             cerr << "serializing graph to test_graph.vg" << endl;
-                            graph.serialize_to_file("test_graph.vg");
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
                             REQUIRE(false);
                         }
                         if (max_distance < snarl_distance){
@@ -7648,11 +7748,10 @@ namespace vg {
                             cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
                             cerr << "minimum: " << snarl_distance << " maximum: " << max_distance << endl;
                             cerr << "serializing graph to test_graph.vg" << endl;
-                            graph.serialize_to_file("test_graph.vg");
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
                             REQUIRE(false);
                         }
                         REQUIRE((snarl_distance >= dijkstra_distance || snarl_distance == std::numeric_limits<size_t>::max()));
-                            graph.serialize_to_file("test_graph.vg");
                         if (!traceback.first.empty() && ! traceback.second.empty()) {
                             size_t traceback_distance = 0;
                             for (auto x : traceback.first){
@@ -7699,7 +7798,7 @@ namespace vg {
                             cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
                             cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
                             cerr << "serializing graph to test_graph.vg" << endl;
-                            graph.serialize_to_file("test_graph.vg");
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
                             REQUIRE(false);
                         }
                         REQUIRE((snarl_distance >= dijkstra_distance || snarl_distance == std::numeric_limits<size_t>::max()));
@@ -7766,9 +7865,179 @@ namespace vg {
                 */
                     
             }
-        
-            
-        } 
+
+
+        }
+
+        TEST_CASE( "Distance index hub labeling matches Dijkstra on random graphs",
+                  "[snarl_distance][snarl_distance_random_hub_labels]" ) {
+
+            // Force hub labeling on essentially every snarl by pinning size_limit
+            // to 1, and bias graphs toward snarlier topologies so the hub-label
+            // path actually has interior nodes to label.
+
+            std::default_random_engine generator(test_seed_source());
+
+            for (size_t repeat = 0; repeat < 1000; repeat++) {
+
+                std::uniform_int_distribution<size_t> bases_dist(200, 1500);
+                size_t bases = bases_dist(generator);
+                std::uniform_int_distribution<size_t> variant_bases_dist(1, bases/15);
+                size_t variant_bases = variant_bases_dist(generator);
+                std::uniform_int_distribution<size_t> variant_count_dist(bases/15, bases/8);
+                size_t variant_count = variant_count_dist(generator);
+
+                std::uniform_real_distribution<double> flip_dist(0.0, 1.0);
+                double node_flip_fraction = flip_dist(generator);
+                double chain_flip_fraction = flip_dist(generator);
+
+                // Anything > 1 trips the populate_hub_labeling branch.
+                const size_t size_limit = 1;
+
+#ifdef debug
+                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events with " << node_flip_fraction << " nodes flipped and " << chain_flip_fraction << " of chains flipped, with size limit " << size_limit << endl;
+#endif
+
+                // Generate a base graph
+                VG base_graph;
+                random_graph(bases, variant_bases, variant_count, &base_graph);
+
+                // Flip some fraction of the nodes to their local reverse orientation
+                bdsg::HashGraph graph = randomly_flipped_nodes(base_graph, node_flip_fraction, generator);
+
+                // Find snarls
+                IntegratedSnarlFinder base_finder(graph);
+
+                // Flip some fraction of the chains to their opposite orientation.
+                SnarlDecompositionFuzzer finder(&graph, &base_finder, chain_flip_fraction, generator);
+
+                // Build the index
+                SnarlDistanceIndex distance_index;
+                fill_in_distance_index(&distance_index, &graph, &finder, size_limit);
+
+                //Make sure that the distance index found all the nodes
+                for (id_t id = graph.min_node_id() ; id <= graph.max_node_id() ; id++) {
+                    if (graph.has_node(id)) {
+                        handle_t handle = graph.get_handle(id);
+                        REQUIRE(graph.get_length(handle) ==
+                                distance_index.node_length(distance_index.get_net(handle, &graph)));
+                    }
+                }
+
+                for (size_t repeat_positions = 0 ; repeat_positions < 500 ; repeat_positions++) {
+                    //Pick random pairs of positions and find the distance between them
+                    id_t node_id1 = 0;
+                    id_t node_id2 = 0;
+                    uniform_int_distribution<int> random_node_ids(graph.min_node_id(),graph.max_node_id());
+                    default_random_engine generator(test_seed_source());
+                    while (node_id1 == 0) {
+                        id_t new_id = random_node_ids(generator);
+                        if (graph.has_node(new_id)) {
+                            node_id1 = new_id;
+                        }
+                    }
+                    while (node_id2 == 0) {
+                        id_t new_id = random_node_ids(generator);
+                        if (graph.has_node(new_id)) {
+                            node_id2 = new_id;
+                        }
+                    }
+
+                    REQUIRE(graph.has_node(node_id1));
+                    REQUIRE(graph.has_node(node_id2));
+
+
+                    offset_t offset1 = uniform_int_distribution<int>(0,graph.get_length(graph.get_handle(node_id1)) - 1)(generator);
+                    offset_t offset2 = uniform_int_distribution<int>(0,graph.get_length(graph.get_handle(node_id2)) - 1)(generator);
+                    bool rev1 = uniform_int_distribution<int>(0,1)(generator) == 0;
+                    bool rev2 = uniform_int_distribution<int>(0,1)(generator) == 0;
+
+
+                    handle_t handle1 = graph.get_handle(node_id1, rev1);
+                    handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+
+                    //Find actual distance
+                    size_t dijkstra_distance = std::numeric_limits<size_t>::max();
+                    if (node_id1 == node_id2 && offset1 <= offset2 && rev1 == rev2) {
+                        dijkstra_distance = offset2 - offset1;
+
+                        pair<vector<tuple<net_handle_t, int32_t, int32_t>>,vector<tuple<net_handle_t, int32_t, int32_t>>> traceback;
+                        size_t snarl_distance = distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph, &traceback);
+                        size_t max_distance = distance_index.maximum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2);
+                        if (snarl_distance != dijkstra_distance){
+                            cerr << "Failed random hub-label test" << endl;
+                            cerr << "Snarl size limit: " << size_limit << endl;
+                            cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
+                            cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
+                            cerr << "serializing graph to test_graph.vg" << endl;
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
+                            REQUIRE(false);
+                        }
+                        if (max_distance < snarl_distance){
+                            cerr << "Failed random hub-label test" << endl;
+                            cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
+                            cerr << "minimum: " << snarl_distance << " maximum: " << max_distance << endl;
+                            cerr << "serializing graph to test_graph.vg" << endl;
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
+                            REQUIRE(false);
+                        }
+                        REQUIRE((snarl_distance >= dijkstra_distance || snarl_distance == std::numeric_limits<size_t>::max()));
+                        if (!traceback.first.empty() && ! traceback.second.empty()) {
+                            size_t traceback_distance = 0;
+                            for (auto x : traceback.first){
+                                if (std::get<1>(x) != std::numeric_limits<int32_t>::max() && std::get<1>(x) != std::numeric_limits<int32_t>::min()) {
+                                    traceback_distance += std::abs(std::get<1>(x));
+                                } else if (std::get<2>(x) != std::numeric_limits<int32_t>::max() && std::get<2>(x) != std::numeric_limits<int32_t>::min()){
+                                    traceback_distance += std::abs(std::get<2>(x));
+                                }
+                            }
+                            for (size_t i = 0 ; i < traceback.second.size()-1 ; i++) {
+                                auto x = traceback.second[i];
+
+                                if (std::get<1>(x) != std::numeric_limits<int32_t>::max() && std::get<1>(x) != std::numeric_limits<int32_t>::min()) {
+                                    traceback_distance += std::abs(std::get<1>(x));
+                                } else if (std::get<2>(x) != std::numeric_limits<int32_t>::max() && std::get<2>(x) != std::numeric_limits<int32_t>::min()){
+                                    traceback_distance += std::abs(std::get<2>(x));
+                                }
+                            }
+                            REQUIRE(snarl_distance == traceback_distance);
+                        } else {
+                            REQUIRE(snarl_distance == std::numeric_limits<size_t>::max());
+                        }
+
+                    } else if (node_id1 == node_id2 ) {
+                        //TOOD: The dijkstra algorithm won't visit the start node twice
+                    } else {
+                        bool first = true;
+                        handlegraph::algorithms::dijkstra(&graph, handle1, [&](const handle_t& reached, size_t distance) {
+                            if (reached == handle2 && ! first) {
+                                dijkstra_distance = distance;
+                                dijkstra_distance += graph.get_length(graph.get_handle(node_id1)) - offset1;
+                                dijkstra_distance += offset2;
+                                return false;
+                            }
+                            first = false;
+                            return true;
+                        }
+                        , false);
+
+                        size_t snarl_distance = distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph);
+                        if (snarl_distance != dijkstra_distance){
+                            cerr << "Failed random hub-label test" << endl;
+                            cerr << "Snarl size limit: " << size_limit << endl;
+                            cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
+                            cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
+                            cerr << "serializing graph to test_graph.vg" << endl;
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
+                            REQUIRE(false);
+                        }
+                        REQUIRE((snarl_distance >= dijkstra_distance || snarl_distance == std::numeric_limits<size_t>::max()));
+                    }
+                }
+            }
+        }
+
         //TEST_CASE("Failed unit test", "[failed]") {
         //    //Load failed random graph
         //    ifstream vg_stream("test_graph.hg");
@@ -7789,8 +8058,372 @@ namespace vg {
         //                return true;
         //            });
         //}
+        
+        TEST_CASE( "Distance index can query a troublesome oversized snarl",
+                  "[snarl_distance]" ) {
+
+            std::string graph_json = R"({
+                "node": [
+                    {"id": "19","sequence": "A"},
+                    {"id": "20","sequence": "A"},
+                    {"id": "21","sequence": "A"},
+                    {"id": "22","sequence": "A"},
+                    {"id": "23","sequence": "A"} 
+                ], "edge": [
+                    {"from": "19","to": "20"},
+                    {"from": "19","to": "22"},
+                    {"from": "20","to": "21"},
+                    {"from": "20","to": "23"},
+                    {"from": "21","to": "22"}, 
+                    {"from": "22","to": "23"}
+                ]
+            })";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+
+            id_t node_id1 = 19; bool rev1 = false ; size_t offset1 = 0;
+            id_t node_id2 = 23; bool rev2 = false ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t dijkstra_distance = std::numeric_limits<size_t>::max();
+            handlegraph::algorithms::dijkstra(&graph, handle1, [&](const handle_t& reached, size_t distance) {
+                if (reached == handle2) {
+                    dijkstra_distance = distance;
+                    dijkstra_distance += graph.get_length(graph.get_handle(node_id1)) - offset1;
+                    dijkstra_distance += offset2;
+                    return false;
+                }
+                return true;
+            }
+            , false);
+
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == dijkstra_distance);
+        }
+
+        TEST_CASE( "Distance index can query out of a SNP with a reversing allele as an oversided snarl",
+                  "[snarl_distance]" ) {
+            
+            // This is a snarl from 1 to 2, where 4 nand 5 are a SNP, and 3
+            // lets you double back to the start
+            std::string graph_json = R"({
+                "node": [
+                    {"id": "1","sequence": "AAAAA"},
+                    {"id": "2","sequence": "AAAAA"},
+                    {"id": "3","sequence": "A"},
+                    {"id": "4","sequence": "A"},
+                    {"id": "5","sequence": "A"} 
+                ], "edge": [
+                    {"from": "1","to": "3"},
+                    {"from": "1","to": "4"},
+                    {"from": "1","to": "5"},
+                    {"from": "3","to": "1", "to_end": true},
+                    {"from": "4","to": "2"}, 
+                    {"from": "5","to": "2"}
+                ]
+            })";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+            
+            // We want to be able to get out of the snarl from node 4, which we definitely can.
+            id_t node_id1 = 4; bool rev1 = false ; size_t offset1 = 1;
+            id_t node_id2 = 2; bool rev2 = false ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t true_distance = 0;
+
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == true_distance);
+
+            // And out of the snarl to the left from 3 reverse to 1 reverse should also be 0
+            node_id1 = 3; rev1 = true; offset1 = 1;
+            node_id2 = 1; rev2 = true; offset2 = 0;
+            true_distance = 0;
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == true_distance);
+
+        }
+
+        TEST_CASE( "Distance index can query within a fiddly snarl",
+                  "[snarl_distance]" ) {
+            
+            std::string graph_json = R"({"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "3", "to_end": true}, {"from": "1", "to": "4"}, {"from": "1", "to": "5"}, {"from": "4", "to": "5", "to_end": true}, {"from": "2", "from_start": true, "to": "4", "to_end": true}], "node": [{"id": "5", "sequence": "A"}, {"id": "1", "sequence": "AAAAA"}, {"id": "4", "sequence": "A"}, {"id": "2", "sequence": "AAAAA"}, {"id": "3", "sequence": "A"}]})";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+            
+            id_t node_id1 = 4; bool rev1 = false ; size_t offset1 = 1;
+            id_t node_id2 = 5; bool rev2 = true ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t true_distance = 0;
+
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == true_distance);
+        }
+
+        TEST_CASE( "Distance index can query into a child snarl in reverse",
+                  "[snarl_distance]" ) {
+            
+            std::string graph_json = R"({"node":[{"id":"79","sequence":"A"},{"id":"16","sequence":"A"},{"id":"60","sequence":"A"},{"id":"37","sequence":"A"},{"id":"40","sequence":"A"},{"id":"53","sequence":"A"},{"id":"59","sequence":"A"},{"id":"63","sequence":"A"},{"id":"18","sequence":"A"},{"id":"38","sequence":"A"},{"id":"62","sequence":"A"}],"edge":[{"from":"16","to":"53"},{"from":"16","from_start":true,"to":"79","to_end":true},{"from":"60","to":"62"},{"from":"60","from_start":true,"to":"79","to_end":true},{"from":"37","from_start":true,"to":"63","to_end":true},{"from":"37","from_start":true,"to":"40"},{"from":"53","to":"60"},{"from":"59","to":"63"},{"from":"59","from_start":true,"to":"60","to_end":true},{"from":"18","to":"53"},{"from":"18","to":"38"},{"from":"18","from_start":true,"to":"79","to_end":true},{"from":"18","from_start":true,"to":"37","to_end":true},{"from":"38","to":"63","to_end":true},{"from":"38","to":"40"},{"from":"62","to":"63"}]})";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+            
+            id_t node_id1 = 16; bool rev1 = false ; size_t offset1 = 1;
+            id_t node_id2 = 62; bool rev2 = true ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t dijkstra_distance = std::numeric_limits<size_t>::max();
+            handlegraph::algorithms::dijkstra(&graph, handle1, [&](const handle_t& reached, size_t distance) {
+                if (reached == handle2) {
+                    dijkstra_distance = distance;
+                    dijkstra_distance += graph.get_length(graph.get_handle(node_id1)) - offset1;
+                    dijkstra_distance += offset2;
+                    return false;
+                }
+                return true;
+            }
+            , false);
+
+            size_t index_distance = distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph);
+
+            REQUIRE(index_distance == dijkstra_distance);
+        }
+
+
+        TEST_CASE( "Distance index can query all possible 3-node-with-legs snarls",
+                 "[snarl_distance]" ) {
+
+            // We're going to generate all possible snarls you can get by
+            // starting with the boundary nodes, taking up to 3 nodes and
+            // connecting them, one nodeside at a time, onto the existing
+            // nodes.
+            //
+            // Combinatorics says this is a manageable number; each nodeside
+            // picks from one of the previous nodesides and attaches to it.
+            
+            /// Call the callback with each possible combination of choices of
+            /// previous items.
+            ///
+            /// start_size is the number of items present before we start
+            /// making choices; the first entry can choose from start_size
+            /// items.
+            /// 
+            /// end_size is the total number of items to think about, including
+            /// those in start_size.
+            /// 
+            /// Calls the callback with all possible vectors of length
+            /// (end_size - start_size) matching these constraints.
+            auto for_all_choice_combinations = [](size_t start_size, size_t end_size, const std::function<void(const std::vector<size_t>&)>& callback) { 
+
+                std::vector<size_t> choices(end_size - start_size, 0);
+                while (true) {
+#ifdef debug
+                    std::cerr << "Consider combination:";
+                    for (auto& item : choices) {
+                        std::cerr << " " << item;
+                    }
+                    std::cerr << std::endl;
+#endif
+                    callback(choices);
+                    choices.back()++;
+                    for (size_t i = end_size - 1; i >= start_size; i--) {
+                        if (choices.at(i - start_size) >= i) {
+                            // We've reached the point where we want to pick from a
+                            // choice not available at this point.
+                            // At i=2 we can choose between 0 and 1, so we carry at i.
+                            if (i == start_size) {
+                                // We've counted all possibilities
+                                return;
+                            } else {
+                                // Carry and reset to 0.
+                                choices.at(i - start_size - 1)++;
+                                choices.at(i - start_size) = 0;
+                            }
+                        } else {
+                            // No more carrying to do
+                            break;
+                        }
+                    }
+                }
+            };
+            
+            // How big should a snarl be allowed to be before being oversized?
+            size_t size_limit = 2;
+            // How many content nodes should be inside the snarl?
+            const size_t MAX_NODES = 3;
+            // How many node sides do we need to worry about, including the boundary sentinels?
+            size_t max_node_sides = MAX_NODES * 2 + 2;
+            for_all_choice_combinations(2, max_node_sides, [&](const std::vector<size_t>& choices) {
+                // Build the choices into a graph.
+
+                bdsg::HashGraph graph;
+                // Make the bounding nodes heavy so they are likely to root the snarl
+                handle_t start_node = graph.create_handle("AAAAA");
+                handle_t end_node = graph.create_handle("AAAAA");
+
+                std::vector<handle_t> connect_to;
+                connect_to.reserve(max_node_sides);
+                // Choice 0 is start node, arriving reading out
+                connect_to.push_back(graph.flip(start_node));
+                // Choice 1 is end node reading out
+                connect_to.push_back(end_node);
+
+                for (size_t i = 0; i < choices.size(); i += 2) {
+                    // Make a node
+                    handle_t new_node = graph.create_handle("A");
+                    // Make sure to remember it so it can choose itself
+                    connect_to.push_back(new_node);
+                    connect_to.push_back(graph.flip(new_node));
+                    // Connect its left and right to each pair of choices.
+                    graph.create_edge(graph.flip(new_node), connect_to.at(choices.at(i)));
+                    graph.create_edge(new_node, connect_to.at(choices.at(i + 1)));
+                }
+
+                // TODO: It might be more efficient to un-build the things that
+                // change between graphs instead of rebuilding from scratch for
+                // every case.
+                
+                // Skip graphs where the choices mean the graph isn't actually
+                // connected, because then it can't be recognized as a snarl
+                // probably.
+                std::vector<std::unordered_set<nid_t>> components = handlegraph::algorithms::weakly_connected_components(&graph);
+                if (components.size() > 1) {
+                    return;
+                }
+
+                // Now index the graph for query
+                IntegratedSnarlFinder finder(graph); 
+                SnarlDistanceIndex distance_index;
+                fill_in_distance_index(&distance_index, &graph, &finder, size_limit);
+
+                // Compute the truth all-to-all distances, between outgoing
+                // side of first handle and incoming side of second.
+                // Both handles are oriented along the connecting path.
+                // TODO: We compute/store both triangles of the matrix; can we avoid one somehow?
+                std::unordered_map<handle_t, std::unordered_map<handle_t, size_t>> dijkstra_distances;
+                graph.for_each_handle([&](const handle_t& base) {
+                    for (const handle_t& here : {base, graph.flip(base)}) {
+                        if (here == graph.flip(start_node) || here == end_node) {
+                            // Skip traversals looking out of the snarl
+                            return;
+                        }
+                        dijkstra_distances.emplace(here, handlegraph::algorithms::find_shortest_paths(&graph, here));
+                    }
+                });
+
+                // The Dijkstra traversal always sees a handle to itself at
+                // distance 0. We need to get the real back-to-self distance,
+                // if any, and fill that in.
+                graph.for_each_handle([&](const handle_t& base) {
+                    for (const handle_t& here : {base, graph.flip(base)}) {
+                        if (here == graph.flip(start_node) || here == end_node) {
+                            // Skip traversals looking out of the snarl
+                            return;
+                        }
+
+                        // The place we need to arrive at is ourselves, since
+                        // both start and end are oriented along the connecting
+                        // path here.
+                    
+                        size_t loop_distance = std::numeric_limits<size_t>::max();
+                        // See if we can get back here from any of the places we can get
+                        graph.follow_edges(here, false, [&](const handle_t next) {
+                            if (next == here) {
+                                // We found a real self loop
+                                loop_distance = 0;
+                                return false;
+                            }
+                            auto found_index = dijkstra_distances.find(next);
+                            if (found_index == dijkstra_distances.end()) {
+                                // This destination can't get anywhere.
+                                // This should be impossible since the Dijkstra always will point a node at itself.
+                                return true;
+                            }
+                            auto found_distance = found_index->second.find(here);
+                            if (found_distance == found_index->second.end()) {
+                                // This destination can't get back to us
+                                return true;
+                            }
+                            // If we find a way back, min in its distance.
+                            loop_distance = std::min(loop_distance, graph.get_length(next) + found_distance->second);
+                            return true;
+                        });
+
+#ifdef debug
+                        std::cerr << "Real self loop distance for " << graph.get_id(here) << (graph.get_is_reverse(here) ? "rev" : "fd") << " -> " << graph.get_id(here) << (graph.get_is_reverse(here) ? "rev" : "fd") << " is " << loop_distance << std::endl;
+#endif
+
+                        if (loop_distance == std::numeric_limits<size_t>::max()) {
+                            // There's really no way back from this node to itself in the same orientation. Delete the entry the Dijkstra search adds.
+                            dijkstra_distances.at(here).erase(here);
+                        } else {
+                            // There is a way back; store the value.
+                            dijkstra_distances.at(here)[here] = loop_distance;
+                        }
+                    };
+                });
+
+#ifdef debug
+                for (auto& [start_handle, distances] : dijkstra_distances) {
+                    for (auto& [end_handle, dijkstra_distance] : distances) {
+                        cerr << "Dijkstra sees: " << graph.get_id(start_handle) << (graph.get_is_reverse(start_handle) ? "rev" : "fd") << graph.get_length(start_handle) << " -> " << graph.get_id(end_handle) << (graph.get_is_reverse(end_handle) ? "rev" : "fd") << 0 << " = " << dijkstra_distance << endl;
+                    }
+                }
+#endif
+
+                // Now query all of the distances against the index
+                for (auto& [start_handle, distances] : dijkstra_distances) {
+                    for (auto& [end_handle, dijkstra_distance] : distances) {
+                        // Ask for distance between outgoing side of first handle and incoming side of second.
+                       
+#ifdef debug
+                        cerr << "Measure: " << graph.get_id(start_handle) << (graph.get_is_reverse(start_handle) ? "rev" : "fd") << graph.get_length(start_handle) << " -> " << graph.get_id(end_handle) << (graph.get_is_reverse(end_handle) ? "rev" : "fd") << 0 << endl;
+#endif
+
+                        size_t snarl_distance = distance_index.minimum_distance(graph.get_id(start_handle), graph.get_is_reverse(start_handle), graph.get_length(start_handle), graph.get_id(end_handle), graph.get_is_reverse(end_handle), 0, false, &graph);
+
+                        if (snarl_distance != dijkstra_distance) {
+                            cerr << "Failed exhaustive test" << endl;
+                            cerr << "Snarl size limit: " << size_limit << endl;
+                            cerr << graph.get_id(start_handle) << (graph.get_is_reverse(start_handle) ? "rev" : "fd") << graph.get_length(start_handle) << " -> " << graph.get_id(end_handle) << (graph.get_is_reverse(end_handle) ? "rev" : "fd") << 0 << endl;
+                            cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
+                            cerr << "serializing graph to test_graph.vg" << endl;
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
+                        }
+                        REQUIRE(snarl_distance == dijkstra_distance);
+                    }
+                }
+            });
+            
+        }
+        
+
         TEST_CASE( "random minimum distance paths",
-                  "[snarl_distance_random_paths]" ) {
+                  "[snarl_distance][snarl_distance_random_paths]" ) {
         
             // Each actual graph takes a fairly long time to do so we randomize sizes...
             
@@ -7809,7 +8442,7 @@ namespace vg {
                 size_t size_limit = snarl_size_limit_dist(generator);
                         
 #ifdef debug
-                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events" << endl;
+                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events with size limit " << size_limit << endl;
 #endif
             
                 VG graph;
@@ -7818,7 +8451,7 @@ namespace vg {
                 SnarlDistanceIndex distance_index;
                 fill_in_distance_index(&distance_index, &graph, &finder, size_limit);
 
-                graph.serialize_to_file("test_graph.vg");
+                vg::io::save_handle_graph(&graph, "test_graph.vg");
                 for (size_t repeat_positions = 0 ; repeat_positions < 500 ; repeat_positions++) {
                     //Pick random pairs of positions and find the distance between them
                     id_t node_id1 = 0;
diff --git a/src/unittest/snarls.cpp b/src/unittest/snarls.cpp
index c2f5030326..c7edf85b05 100644
--- a/src/unittest/snarls.cpp
+++ b/src/unittest/snarls.cpp
@@ -9,6 +9,8 @@
 #include <sstream>
 #include <set>
 #include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 #include "catch.hpp"
 #include "support/random_graph.hpp"
@@ -1697,14 +1699,12 @@ namespace vg {
                              ]
                 }
                 )";
-                
+
                 VG graph;
-                
+
                 // Load up the graph
-                Graph g;
-                json2pb(g, graph_json.c_str(), graph_json.size());
-                graph.extend(g);
-                
+                vg::io::json2graph(graph_json, &graph);
+
                 // Define the one snarl
                 Snarl snarl1;
                 snarl1.mutable_start()->set_node_id(6462830);
@@ -1830,14 +1830,12 @@ namespace vg {
                 string snarl1_json = R"({"type": 1, "end": {"node_id": 187208}, "start": {"node_id": 178894}})";
                 string snarl2_json = R"({"type": 1, "end": {"node_id": 187209, "backward": true}, "start": {"node_id": 178895, "backward": true}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
                 string snarl3_json = R"({"type": 1, "end": {"node_id": 178896}, "start": {"node_id": 178895}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
-                
+
                 VG graph;
-                
+
                 // Load up the graph
-                Graph g;
-                json2pb(g, graph_json.c_str(), graph_json.size());
-                graph.extend(g);
-                
+                vg::io::json2graph(graph_json, &graph);
+
                 // Load the snarls
                 Snarl snarl1, snarl2, snarl3;
                 json2pb(snarl1, snarl1_json.c_str(), snarl1_json.size());
@@ -1917,13 +1915,11 @@ namespace vg {
             }
             
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             // We need to see the path.
             REQUIRE(graph.paths.size() == 1);
             
@@ -2045,10 +2041,8 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
 
 #ifdef debug
@@ -2061,7 +2055,7 @@ namespace vg {
                 cerr << endl;
             });
 #endif
-        
+
             SECTION("Root node has 1 child bubble") {
                 REQUIRE(snarl_manager.top_level_snarls().size() == 1);
                 
@@ -2127,15 +2121,13 @@ namespace vg {
                     ]}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2246,15 +2238,13 @@ namespace vg {
                     ]}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2354,18 +2344,16 @@ namespace vg {
                     {"from": 2, "to": 4},
                     {"from": 2, "to": 3},
                     {"from": 2, "to": 2},
-                    {"from": 3, "to": 3}            
+                    {"from": 3, "to": 3}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2415,15 +2403,13 @@ namespace vg {
                     ]}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2490,18 +2476,16 @@ namespace vg {
                 "edge": [
                     {"from": 1, "to": 2},
                     {"from": 2, "to": 1}
-                    
+
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2555,15 +2539,13 @@ namespace vg {
                     {"from": 3, "to": 6}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2767,15 +2749,13 @@ namespace vg {
                     {"from": 9, "to": 10}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
 #ifdef debug
             snarl_manager.for_each_snarl_preorder([&](const Snarl* snarl) {
@@ -3919,14 +3899,12 @@ namespace vg {
                             {"position": {"node_id": 7, "is_reverse" : "true"}, "rank" : 5 }
                         ]}
                     ]
-                }            
+                }
                 )";
-                
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             assert(graph.is_valid());
             
             SECTION( "PathTraversalFinder can find simple forward traversals") {
diff --git a/src/unittest/source_sink_overlay.cpp b/src/unittest/source_sink_overlay.cpp
index 4c0ecbc20f..bf2aa3bc13 100644
--- a/src/unittest/source_sink_overlay.cpp
+++ b/src/unittest/source_sink_overlay.cpp
@@ -10,7 +10,8 @@
 #include "../source_sink_overlay.hpp"
 #include "../kmer.hpp"
 #include "../vg.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 #include <iostream>
 #include <vector>
@@ -132,11 +133,9 @@ TEST_CASE("SourceSinkOverlay adds a source and a sink to a 1-node graph", "[over
 
 TEST_CASE("SourceSinkOverlay agrees with VG::add_start_end_markers in a tiny graph", "[overlay]") {
     const string graph_json = R"({"node":[{"sequence":"CAAATAAG","id":"1"},{"sequence":"A","id":"2"},{"sequence":"G","id":"3"},{"sequence":"T","id":"4"},{"sequence":"C","id":"5"},{"sequence":"TTG","id":"6"},{"sequence":"A","id":"7"},{"sequence":"G","id":"8"},{"sequence":"AAATTTTCTGGAGTTCTAT","id":"9"},{"sequence":"A","id":"10"},{"sequence":"T","id":"11"},{"sequence":"ATAT","id":"12"},{"sequence":"A","id":"13"},{"sequence":"T","id":"14"},{"sequence":"CCAACTCTCTG","id":"15"}],"edge":[{"from":"1","to":"2"},{"from":"1","to":"3"},{"from":"2","to":"4"},{"from":"2","to":"5"},{"from":"3","to":"4"},{"from":"3","to":"5"},{"from":"4","to":"6"},{"from":"5","to":"6"},{"from":"6","to":"7"},{"from":"6","to":"8"},{"from":"7","to":"9"},{"from":"8","to":"9"},{"from":"9","to":"10"},{"from":"9","to":"11"},{"from":"10","to":"12"},{"from":"11","to":"12"},{"from":"12","to":"13"},{"from":"12","to":"14"},{"from":"13","to":"15"},{"from":"14","to":"15"}],"path":[{"name":"x","mapping":[{"position":{"node_id":"1"},"edit":[{"from_length":8,"to_length":8}],"rank":"1"},{"position":{"node_id":"3"},"edit":[{"from_length":1,"to_length":1}],"rank":"2"},{"position":{"node_id":"5"},"edit":[{"from_length":1,"to_length":1}],"rank":"3"},{"position":{"node_id":"6"},"edit":[{"from_length":3,"to_length":3}],"rank":"4"},{"position":{"node_id":"8"},"edit":[{"from_length":1,"to_length":1}],"rank":"5"},{"position":{"node_id":"9"},"edit":[{"from_length":19,"to_length":19}],"rank":"6"},{"position":{"node_id":"11"},"edit":[{"from_length":1,"to_length":1}],"rank":"7"},{"position":{"node_id":"12"},"edit":[{"from_length":4,"to_length":4}],"rank":"8"},{"position":{"node_id":"14"},"edit":[{"from_length":1,"to_length":1}],"rank":"9"},{"position":{"node_id":"15"},"edit":[{"from_length":11,"to_length":11}],"rank":"10"}]}]})";
-    
-    Graph graph;
-    json2pb(graph, graph_json);
-    
-    VG produced(graph);
+
+    VG produced;
+    vg::io::json2graph(graph_json, &produced);
     
     id_t highest_id = produced.max_node_id();
     id_t start_id = highest_id + 1;
diff --git a/src/unittest/support/random_graph.hpp b/src/unittest/support/random_graph.hpp
index 7597beeab9..e3e812d265 100644
--- a/src/unittest/support/random_graph.hpp
+++ b/src/unittest/support/random_graph.hpp
@@ -1,11 +1,16 @@
+#ifndef VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
+#define VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
+/** \file random_graph.hpp
+ * Utilities for randomizing graphs for test cases.
+ */
+
+
 #include "handle.hpp"
 #include <vector>
 
-#ifndef VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
-#define VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
 
-namespace vg{
-namespace unittest{
+namespace vg {
+namespace unittest {
 
 /// Create a random graph by adding variation to a sequence of length seq_size
 /// variant_len is the mean length of a larger variation and variant_count
diff --git a/src/unittest/support/randomly_flipped_nodes.hpp b/src/unittest/support/randomly_flipped_nodes.hpp
new file mode 100644
index 0000000000..40b00bda26
--- /dev/null
+++ b/src/unittest/support/randomly_flipped_nodes.hpp
@@ -0,0 +1,83 @@
+#ifndef VG_UNITTEST_RANDOMLY_FLIPPED_NODES_HPP_INCLUDED
+#define VG_UNITTEST_RANDOMLY_FLIPPED_NODES_HPP_INCLUDED
+
+/**
+ * \file randomly_flipped_nodes.hpp
+ * Utility for creating a copy of a HandleGraph with a random subset of nodes
+ * flipped in orientation.
+ */
+
+#include <random>
+#include <bdsg/hash_graph.hpp>
+#include "handle.hpp"
+
+namespace vg {
+namespace unittest {
+
+/**
+ * Return a copy of the given graph with approximately p_flip fraction of its
+ * nodes reversed in their local forward orientation. When a node is flipped,
+ * its sequence is reverse-complemented and all edges that connected to its
+ * forward orientation now connect to its reverse orientation, and vice versa.
+ *
+ * The returned graph preserves node IDs.
+ */
+template<typename URNG>
+bdsg::HashGraph randomly_flipped_nodes(const HandleGraph& source, double p_flip, URNG& generator) {
+    bdsg::HashGraph result;
+
+    std::uniform_real_distribution<double> dist(0.0, 1.0);
+
+    // Track which nodes get flipped
+    std::unordered_set<nid_t> flipped;
+
+    // Copy all nodes, flipping some
+    source.for_each_handle([&](const handle_t& handle) {
+        nid_t id = source.get_id(handle);
+        if (dist(generator) < p_flip) {
+            // Flip this node: store its reverse complement sequence as forward
+            result.create_handle(source.get_sequence(source.flip(handle)), id);
+            flipped.insert(id);
+        } else {
+            // Keep this node as-is
+            result.create_handle(source.get_sequence(handle), id);
+        }
+    });
+
+    // Copy all edges, adjusting for flipped nodes.
+    // An edge (left, right) means: leave left in its orientation, enter right
+    // in its orientation. If we flipped a node, we need to toggle the
+    // orientation on that side of the edge.
+    source.for_each_edge([&](const edge_t& edge) {
+        handle_t left = edge.first;
+        handle_t right = edge.second;
+
+        nid_t left_id = source.get_id(left);
+        bool left_is_reverse = source.get_is_reverse(left);
+
+        nid_t right_id = source.get_id(right);
+        bool right_is_reverse = source.get_is_reverse(right);
+
+        // If we flipped a node, toggle the orientation for that side
+        if (flipped.count(left_id)) {
+            left_is_reverse = !left_is_reverse;
+        }
+        if (flipped.count(right_id)) {
+            right_is_reverse = !right_is_reverse;
+        }
+
+        result.create_edge(
+            result.get_handle(left_id, left_is_reverse),
+            result.get_handle(right_id, right_is_reverse)
+        );
+
+        return true;
+    });
+
+    return result;
+}
+
+} // namespace unittest
+} // namespace vg
+
+#endif
diff --git a/src/unittest/support/snarl_decomposition_fuzzer.cpp b/src/unittest/support/snarl_decomposition_fuzzer.cpp
new file mode 100644
index 0000000000..263ad486cf
--- /dev/null
+++ b/src/unittest/support/snarl_decomposition_fuzzer.cpp
@@ -0,0 +1,187 @@
+#include "snarl_decomposition_fuzzer.hpp"
+
+#include <cassert>
+#include <stack>
+
+namespace vg {
+namespace unittest {
+
+using ET = DecompositionEventType;
+
+SnarlDecompositionFuzzer::SnarlDecompositionFuzzer(
+    const HandleGraph* graph,
+    const HandleGraphSnarlFinder* finder,
+    const std::unordered_set<nid_t>& chains_to_flip)
+    : HandleGraphSnarlFinder(graph), wrapped(finder)
+{
+
+    should_flip = [chains_to_flip, graph](nid_t node_id) -> bool {
+        return chains_to_flip.count(node_id);
+    };
+}
+
+void SnarlDecompositionFuzzer::traverse_decomposition(
+    const function<void(handle_t)>& begin_chain,
+    const function<void(handle_t)>& end_chain,
+    const function<void(handle_t)>& begin_snarl,
+    const function<void(handle_t)>& end_snarl) const
+{
+    // Step 1: Capture all events from the wrapped finder.
+    std::vector<DecompositionHandleEvent> events = capture_events(*wrapped);
+
+    if (events.empty()) {
+        return;
+    }
+
+    // Step 2: Build pairing vector mapping each begin to its matching end
+    // and vice versa, using separate stacks for chains and snarls.
+    std::vector<size_t> other_bound(events.size());
+    {
+        stack<size_t> chain_stack, snarl_stack;
+        for (size_t i = 0; i < events.size(); i++) {
+            switch (events[i].type) {
+            case ET::BEGIN_CHAIN:
+                chain_stack.push(i);
+                break;
+            case ET::END_CHAIN:
+                assert(!chain_stack.empty());
+                other_bound[i] = chain_stack.top();
+                other_bound[chain_stack.top()] = i;
+                chain_stack.pop();
+                break;
+            case ET::BEGIN_SNARL:
+                snarl_stack.push(i);
+                break;
+            case ET::END_SNARL:
+                assert(!snarl_stack.empty());
+                other_bound[i] = snarl_stack.top();
+                other_bound[snarl_stack.top()] = i;
+                snarl_stack.pop();
+                break;
+            }
+        }
+    }
+
+    // Step 3: Walk through events with a cursor, flipping chains as needed.
+    // When we flip a chain, we jump to the other end and reverse direction,
+    // pushing the entry point onto a stack. When the cursor reaches a stack
+    // entry point, we jump back to the far end and restore direction.
+    struct FlipEntry {
+        size_t entry_index;
+        bool original_reverse;
+    };
+    std::stack<FlipEntry> flip_stack;
+
+    auto emitter = event_emitter(begin_chain, end_chain, begin_snarl, end_snarl);
+
+    bool reverse = false;
+    for (size_t cursor = 0; cursor != events.size(); cursor += reverse ? -1 : 1) {
+        // We know if we're entering a chain, we can't be at a stack pop point.
+        // So we can handle those cases separately.
+
+        if (events[cursor].type == (reverse ? ET::END_CHAIN : ET::BEGIN_CHAIN) && 
+            should_flip(graph->get_id(events[cursor].handle))) {
+            
+            // We're entering a chain, and this is a chain we want to flip. So
+            // flip before emitting anything.
+
+            // Flip: remember where we entered, jump to the other end,
+            // reverse direction, emit the entry event there.
+            flip_stack.push({cursor, reverse});
+            cursor = other_bound[cursor];
+            reverse = !reverse;
+        }
+        
+        // Emit the event here
+        emitter(reverse ? flip(events[cursor], graph) : events[cursor]);
+
+        if (!flip_stack.empty() && cursor == flip_stack.top().entry_index) {
+            // We've returned to the entry point of a flipped chain, so after
+            // emitting, go back to the entry orientation and jump to the other
+            // side, so we can advance out of it. 
+            
+            FlipEntry entry = flip_stack.top();
+            flip_stack.pop();
+            cursor = other_bound[entry.entry_index];
+            reverse = entry.original_reverse;
+        }
+    }
+}
+
+// ReplaySnarlFinder implementation
+
+ReplaySnarlFinder::ReplaySnarlFinder(const HandleGraph* graph, const std::vector<DecompositionEvent>& events) : HandleGraphSnarlFinder(graph) {
+    this->events.reserve(events.size());
+    for (const DecompositionEvent& e : events) {
+        // Translate input events into handles
+        this->events.emplace_back(e.type, graph->get_handle(e.id, e.is_reverse));
+    }
+}
+
+void ReplaySnarlFinder::traverse_decomposition(
+    const std::function<void(handle_t)>& begin_chain,
+    const std::function<void(handle_t)>& end_chain,
+    const std::function<void(handle_t)>& begin_snarl,
+    const std::function<void(handle_t)>& end_snarl) const
+{
+    auto emitter = event_emitter(begin_chain, end_chain, begin_snarl, end_snarl);
+    for (auto& event : events) {
+        emitter(event);
+    }
+}
+
+std::function<void(const DecompositionHandleEvent&)> event_emitter(
+    const std::function<void(handle_t)>& begin_chain,
+    const std::function<void(handle_t)>& end_chain,
+    const std::function<void(handle_t)>& begin_snarl,
+    const std::function<void(handle_t)>& end_snarl
+) {
+    return [&](const DecompositionHandleEvent& event) { 
+        switch (event.type) {
+        case ET::BEGIN_CHAIN:
+            begin_chain(event.handle);
+            break;
+        case ET::END_CHAIN:
+            end_chain(event.handle);
+            break;
+        case ET::BEGIN_SNARL:
+            begin_snarl(event.handle);
+            break;
+        case ET::END_SNARL:
+            end_snarl(event.handle);
+            break;
+        }
+    };
+}
+
+std::vector<DecompositionEvent> capture_events(const HandleGraphSnarlFinder& finder, const HandleGraph& graph) {
+    // Get all the events in terms of handles
+    std::vector<DecompositionHandleEvent> handle_result = capture_events(finder);
+    // And translate them to IDs and orientations
+    std::vector<DecompositionEvent> result;
+    result.reserve(handle_result.size());
+    for (DecompositionHandleEvent& e : handle_result) {
+        result.emplace_back(e.type, graph.get_id(e.handle), graph.get_is_reverse(e.handle));
+    }
+    return result;
+}
+
+std::vector<DecompositionHandleEvent> capture_events(const HandleGraphSnarlFinder& finder) {
+    std::vector<DecompositionHandleEvent> result;
+    // Mint out functions that push events of different types.
+    auto event_pusher = [&result](ET event) {
+        return [event,&result](const handle_t& h) {
+            result.push_back({event, h});
+        };
+    };
+    finder.traverse_decomposition(
+        event_pusher(ET::BEGIN_CHAIN),
+        event_pusher(ET::END_CHAIN),
+        event_pusher(ET::BEGIN_SNARL),
+        event_pusher(ET::END_SNARL)
+    );
+    return result;
+}
+
+} // namespace unittest
+} // namespace vg
diff --git a/src/unittest/support/snarl_decomposition_fuzzer.hpp b/src/unittest/support/snarl_decomposition_fuzzer.hpp
new file mode 100644
index 0000000000..91d92e97cb
--- /dev/null
+++ b/src/unittest/support/snarl_decomposition_fuzzer.hpp
@@ -0,0 +1,197 @@
+#ifndef VG_UNITTEST_SNARL_DECOMPOSITION_FUZZER_HPP_INCLUDED
+#define VG_UNITTEST_SNARL_DECOMPOSITION_FUZZER_HPP_INCLUDED
+
+/**
+ * \file snarl_decomposition_fuzzer.hpp
+ * Provides SnarlDecompositionFuzzer, which wraps a HandleGraphSnarlFinder and
+ * randomly flips chains in the snarl decomposition, and ReplaySnarlFinder,
+ * which replays a scripted sequence of decomposition events.
+ */
+
+#include <functional>
+#include <random>
+#include <vector>
+#include <set>
+#include <utility>
+#include "snarls.hpp"
+#include "handle.hpp"
+
+namespace vg {
+namespace unittest {
+
+/// Event types for snarl decomposition traversal.
+enum class DecompositionEventType {
+    BEGIN_CHAIN = 0,
+    END_CHAIN,
+    BEGIN_SNARL,
+    END_SNARL
+};
+
+inline std::ostream& operator<<(std::ostream& out, const DecompositionEventType& t) {
+    int bits = (int)t;
+    return out << (bits & 1 ? "END" : "BEGIN") << "_" << (bits & 2 ? "SNARL" : "CHAIN");
+}
+
+/// Flip the polatiry of an event type (start vs. end)
+inline DecompositionEventType flip(const DecompositionEventType& t) {
+    // We can flip by toggling the low bit.
+    return (DecompositionEventType)((int) t ^ 1);
+}
+
+/// A single event in a snarl decomposition traversal.
+/// This is in terms of IDs and orientations because those are easier to write in test code.
+struct DecompositionEvent {
+    DecompositionEventType type;
+    nid_t id;
+    bool is_reverse;
+
+    inline bool operator==(const DecompositionEvent& other) const {
+        return type == other.type && id == other.id && is_reverse == other.is_reverse;
+    }
+
+    inline bool operator!=(const DecompositionEvent& other) const {
+        return ! (*this == other);
+    }
+};
+
+inline std::ostream& operator<<(std::ostream& out, const DecompositionEvent& e) {
+    return out << e.type << "(" << e.id << (e.is_reverse ? "-" : "+") << ")";
+}
+
+/// A single event in a snarl decomposition traversal.
+/// This is in terms of handles because those are easier to work with internally.
+struct DecompositionHandleEvent {
+    DecompositionEventType type;
+    handle_t handle;
+};
+
+/// Flip the polarity of a whole event (event type between begin and end, and handle orientation)
+inline DecompositionHandleEvent flip(const DecompositionHandleEvent& e, const HandleGraph* g) {
+    return {flip(e.type), g->flip(e.handle)};
+}
+
+/// Turn begin and end functions to call into a function that emits an event by
+/// type. The provided functions must outlive the returned function.
+std::function<void(const DecompositionHandleEvent&)> event_emitter(
+    const std::function<void(handle_t)>& begin_chain,
+    const std::function<void(handle_t)>& end_chain,
+    const std::function<void(handle_t)>& begin_snarl,
+    const std::function<void(handle_t)>& end_snarl
+);
+
+/// Capture all events emitted by a snarl finder, in terms of IDs and orientations.
+std::vector<DecompositionEvent> capture_events(const HandleGraphSnarlFinder& finder, const HandleGraph& graph);
+
+/// Capture all events emitted by a snarl finder, in terms of handles.
+std::vector<DecompositionHandleEvent> capture_events(const HandleGraphSnarlFinder& finder);
+
+/**
+ * A HandleGraphSnarlFinder that wraps another HandleGraphSnarlFinder and
+ * randomly flips chains in the snarl decomposition. Flipping a chain reverses
+ * the entire chain including all children; if a child chain is also selected
+ * for flipping, it gets flipped again (canceling the parent's flip for that
+ * child).
+ *
+ * For non-randomized testing, the specific chains to flip can be 
+ * pre-identified and provided on construction.
+ */
+class SnarlDecompositionFuzzer : public HandleGraphSnarlFinder {
+public:
+    /**
+     * Construct a fuzzer wrapping the given finder, flipping chains with
+     * probability p_flip using the given random generator.
+     * The graph pointer is needed to flip handles.
+     */
+    template<typename URNG>
+    SnarlDecompositionFuzzer(const HandleGraph* graph,
+                             const HandleGraphSnarlFinder* finder,
+                             double p_flip, URNG& generator);
+
+    /**
+     * Construct a fuzzer wrapping the given finder, flipping the chains
+     * bounded by the given node IDs.
+     *
+     * You should provide both bounding IDs for each chain, but only the one
+     * that the chain is actually arrived at through during the traversal will
+     * really get used.
+     *
+     * Note that a node can bound at most one chain.
+     *
+     * This is mostly for testing the fuzzer itself.
+     */
+    SnarlDecompositionFuzzer(const HandleGraph* graph,
+                             const HandleGraphSnarlFinder* finder,
+                             const std::unordered_set<nid_t>& chains_to_flip);
+
+    virtual ~SnarlDecompositionFuzzer() = default;
+
+    /**
+     * Traverse the snarl decomposition, flipping selected chains.
+     */
+    virtual void traverse_decomposition(
+        const std::function<void(handle_t)>& begin_chain,
+        const std::function<void(handle_t)>& end_chain,
+        const std::function<void(handle_t)>& begin_snarl,
+        const std::function<void(handle_t)>& end_snarl
+    ) const override;
+
+private:
+    /// The wrapped snarl finder
+    const HandleGraphSnarlFinder* wrapped;
+
+    /// Function that decides whether to flip a chain, given either of its
+    /// bounding node IDs. May produce different results when called
+    /// multiple times with the same input.
+    std::function<bool(nid_t)> should_flip;
+};
+
+/**
+ * A HandleGraphSnarlFinder that replays a scripted sequence of decomposition
+ * events. Useful for testing SnarlDecompositionFuzzer without needing a real
+ * graph or snarl finder.
+ */
+class ReplaySnarlFinder : public HandleGraphSnarlFinder {
+public:
+    /**
+     * Construct a replay finder that will emit the given events.
+     */
+    ReplaySnarlFinder(const HandleGraph* graph, const std::vector<DecompositionEvent>& events);
+
+    virtual ~ReplaySnarlFinder() = default;
+
+    /**
+     * Replay the scripted events.
+     */
+    virtual void traverse_decomposition(
+        const std::function<void(handle_t)>& begin_chain,
+        const std::function<void(handle_t)>& end_chain,
+        const std::function<void(handle_t)>& begin_snarl,
+        const std::function<void(handle_t)>& end_snarl
+    ) const override;
+
+private:
+
+    using EventType = DecompositionEventType;
+    using Event = DecompositionHandleEvent;
+    
+    /// This stores events we are going to replay.
+    std::vector<Event> events;
+};
+
+
+template<typename URNG>
+SnarlDecompositionFuzzer::SnarlDecompositionFuzzer(
+    const HandleGraph* graph,
+    const HandleGraphSnarlFinder* finder,
+    double p_flip, URNG& generator)
+    : HandleGraphSnarlFinder(graph), wrapped(finder)
+{
+    should_flip = [&generator, p_flip](nid_t ignored) -> bool {
+        return std::uniform_real_distribution<double>(0.0, 1.0)(generator) < p_flip;
+    };
+}
+
+} // namespace unittest
+} // namespace vg
+
+#endif
diff --git a/src/unittest/variant_adder.cpp b/src/unittest/variant_adder.cpp
index afe3353e4b..6fad7d82ab 100644
--- a/src/unittest/variant_adder.cpp
+++ b/src/unittest/variant_adder.cpp
@@ -9,7 +9,7 @@
 
 #include "../utility.hpp"
 #include "../path.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 
 #include <vector>
 #include <sstream>
@@ -38,7 +38,7 @@ ref	5	rs1337	A	G	29	PASS	.	GT
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -51,14 +51,10 @@ ref	5	rs1337	A	G	29	PASS	.	GT
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     
     // Make a VariantAdder
@@ -85,7 +81,7 @@ ref	5	rs1337	A	G	29	PASS	.	GT	0/1
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -98,14 +94,10 @@ ref	5	rs1337	A	G	29	PASS	.	GT	0/1
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -139,7 +131,7 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -152,14 +144,10 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -193,7 +181,7 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -213,14 +201,10 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     SECTION ("should work when the graph is as given") {
     
@@ -280,7 +264,7 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	29
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -293,14 +277,10 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	29
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -323,14 +303,10 @@ TEST_CASE( "The smart aligner works on very large inserts", "[variantadder]" ) {
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GCGCAAAAAAAAAAAAAAAAAAAAAGCGC"}]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -396,21 +372,17 @@ TEST_CASE( "The smart aligner should use mapping offsets on huge deletions", "[v
             {"from": 2, "to": 3}
         ]
     })";
-    
+
     // Make the graph have lots of As
     stringstream a_stream;
     for(size_t i = 0; i < 10000; i++) {
         a_stream << "A";
     }
     graph_json = regex_replace(graph_json, std::regex("<10kAs>"), a_stream.str());
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -484,21 +456,17 @@ TEST_CASE( "The smart aligner should find existing huge deletions", "[variantadd
             {"from": 2, "to": 3}
         ]
     })";
-    
+
     // Make the graph have lots of As
     stringstream a_stream;
     for(size_t i = 0; i < 10000; i++) {
         a_stream << "A";
     }
     graph_json = regex_replace(graph_json, std::regex("<10kAs>"), a_stream.str());
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -564,21 +532,17 @@ TEST_CASE( "The smart aligner should use deletion edits on medium deletions", "[
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GCGC<100As>GCGC"}]
     })";
-    
+
     // Make the graph have lots of As
     stringstream a_stream;
     for(size_t i = 0; i < 100; i++) {
         a_stream << "A";
     }
     graph_json = regex_replace(graph_json, std::regex("<100As>"), a_stream.str());
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
diff --git a/src/unittest/vg.cpp b/src/unittest/vg.cpp
index 9beb3e1ca7..b2795b57cc 100644
--- a/src/unittest/vg.cpp
+++ b/src/unittest/vg.cpp
@@ -8,6 +8,7 @@
 #include "../utility.hpp"
 #include "../algorithms/normalize.hpp"
 #include "../algorithms/disjoint_components.hpp"
+#include "../io/json2graph.hpp"
 #include "handle.hpp"
 
 namespace vg {
@@ -15,16 +16,6 @@ namespace unittest {
 
 using namespace std;
 
-// Turn a JSON string into a VG graph
-VG string_to_graph(const string& json) {
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, json.c_str(), json.size());
-    graph.merge(chunk);
-    
-    return graph;
-}
-
 TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
    
     unordered_map<nid_t, pair<nid_t, bool> > node_translation;
@@ -44,7 +35,7 @@ TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         VG dag = graph.dagify(5, node_translation, 5, 0);
         
@@ -69,7 +60,7 @@ TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         VG dag = graph.dagify(5, node_translation, 5, 0);
         
@@ -93,7 +84,7 @@ TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         VG dag = graph.dagify(5, node_translation, 5, 0);
         
@@ -123,7 +114,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -252,7 +243,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -327,7 +318,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -417,7 +408,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -574,7 +565,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -742,7 +733,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(2, node_translation);
@@ -904,7 +895,7 @@ TEST_CASE("expand_context_by_length() should respect barriers", "[vg][context]")
     }
     )";
     
-    VG graph = string_to_graph(graph_json);
+    VG graph; vg::io::json2graph(graph_json, &graph);
 
     SECTION("barriers on either end of the seed node should stop anything being extracted") {
 
@@ -962,7 +953,7 @@ TEST_CASE("add_nodes_and_edges() should connect all nodes", "[vg][edit]") {
     )";
     
     // Define a graph
-    VG graph = string_to_graph(graph_json);
+    VG graph; vg::io::json2graph(graph_json, &graph);
     
     const string path_json = R"(
     {
@@ -1051,7 +1042,7 @@ TEST_CASE("edit() should not get confused even under very confusing circumstance
     )";
     
     // Define a graph
-    VG graph = string_to_graph(graph_json);
+    VG graph; vg::io::json2graph(graph_json, &graph);
     
     // And a path that doubles back on itself through an edge that isn't in the graph yet
     const string path_json = R"(
@@ -1310,7 +1301,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // One of the two alternative Ts should have been eliminated
@@ -1341,7 +1332,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts should be eliminated
@@ -1375,7 +1366,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts and Gs should be eliminated
@@ -1409,7 +1400,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts and Gs should be eliminated
@@ -1447,7 +1438,7 @@ TEST_CASE("normalize() can join nodes and merge siblings when nodes are backward
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts (actually As) should be eliminated
@@ -1486,7 +1477,7 @@ TEST_CASE("normalize() can join nodes and merge siblings when nodes are backward
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts (actually As) and Gs (actually Cs) should be eliminated
diff --git a/src/unittest/vg_algorithms.cpp b/src/unittest/vg_algorithms.cpp
index b4fc736734..8e713f87f7 100644
--- a/src/unittest/vg_algorithms.cpp
+++ b/src/unittest/vg_algorithms.cpp
@@ -27,7 +27,7 @@
 #include "../vg.hpp"
 #include "../xg.hpp"
 #include <bdsg/hash_graph.hpp>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 
 
 using namespace google::protobuf;
@@ -1092,11 +1092,8 @@ TEST_CASE( "Connecting graph extraction works on a cool loop without leaving ext
     {"edge": [{"from": "185927720", "to": "185927722"}, {"from": "185927721", "from_start": true, "to": "185927722"}, {"from": "185927722", "to": "186681786", "to_end": true}, {"from": "185927722", "to": "185927723"}, {"from": "186681786", "to": "186683083"}, {"from": "186681786", "from_start": true, "to": "186681787", "to_end": true}, {"from": "186681787", "to": "186683069", "to_end": true}, {"from": "186681787", "from_start": true, "to": "186681789"}, {"from": "186681787", "from_start": true, "to": "186681788", "to_end": true}, {"from": "186681788", "from_start": true, "to": "186681790", "to_end": true}, {"from": "186681789", "to": "186681790", "to_end": true}, {"from": "186681790", "from_start": true, "to": "186681792", "to_end": true}, {"from": "186683069", "from_start": true, "to": "186683079", "to_end": true}, {"from": "186683079", "from_start": true, "to": "186683080", "to_end": true}, {"from": "186683080", "from_start": true, "to": "186683081", "to_end": true}, {"from": "186683081", "from_start": true, "to": "186683083", "to_end": true}], "node": [{"id": "185927720", "sequence": "G"}, {"id": "185927721", "sequence": "A"}, {"id": "185927722", "sequence": "ACCGGG"}, {"id": "185927723", "sequence": "AGTGGGGG"}, {"id": "186681786", "sequence": "C"}, {"id": "186681787", "sequence": "TGGGAGTCTAAGTCTCTTTTGATCACACTTTAAAGACCAAAAGGTAGAAGCGCAAAGACGTTATCTGTCCAATATTACAAACCTAGTAAGTGGTGGAATTTGGCCTTGAACCCAGATCTGTAACTCCAGAGCCGAAGTGCTTCACCCACCTCCCTGTGGTG"}, {"id": "186681788", "sequence": "G"}, {"id": "186681789", "sequence": "T"}, {"id": "186681790", "sequence": "TAT"}, {"id": "186681792", "sequence": "T"}, {"id": "186683069", "sequence": "G"}, {"id": "186683079", "sequence": "G"}, {"id": "186683080", "sequence": "TACCCCGGAATCCCTGCCGCGGCCCCTCGGGCCTGTCCACATCCCTCTGCCCCTCCCAGACCTCTGTCCTTCCACCAATCGCCTCCCGCAGCCCCGAGCCGCCACTCCCAGTCCCCCGAGTCCCTGCCGCGCGCCCTCGCGCCTGTCCACATCCCTCTGCCCATCCGAGACCTCTGTCCTTACACCACTAGCCACCCCACGTGGGACTTCCATGGCTTCTGAGTACAAGGCCAGCCCCCCGGCCCACCAGCTTTCGGAATGCCTGCTTACCTCTTTTTCTGTAGA"}, {"id": "186683081", "sequence": "CCGG"}, {"id": "186683083", "sequence": "C"}]}
     )";
             
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG vg;
-    vg.extend(source);
+    vg::io::json2graph(graph_json, &vg);
             
     bdsg::HashGraph extractor;
             
@@ -1688,11 +1685,8 @@ TEST_CASE( "Connecting graph extraction works on a particular case without leavi
         
             )";
             
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG vg;
-    vg.extend(source);
+    vg::io::json2graph(graph_json, &vg);
             
     VG extractor;
             
@@ -2583,13 +2577,9 @@ TEST_CASE( "Topological sort works on a more complex graph",
             {"node": [{"id": 1, "sequence": "GTATTTTTAGTA"}, {"id": 2, "sequence": "G"}, {"id": 3, "sequence": "GAGACGGGGTTTCACCATGTT"}, {"id": 4, "sequence": "T"}, {"id": 5, "sequence": "CTAATTTTT"}, {"id": 6, "sequence": "CA"}, {"id": 7, "sequence": "GG"}, {"id": 8, "sequence": "ACGCCC"}, {"id": 9, "sequence": "C"}, {"id": 10, "sequence": "T"}, {"id": 11, "sequence": "C"}, {"id": 12, "sequence": "GCCA"}, {"id": 13, "sequence": "A"}, {"id": 14, "sequence": "GGGATTACAGGCGCACACC"}, {"id": 15, "sequence": "CCACACC"}, {"id": 16, "sequence": "AT"}, {"id": 17, "sequence": "CC"}, {"id": 18, "sequence": "GGTCAGGCTGGTCTCGACTCC"}, {"id": 19, "sequence": "TGACCTCCTGATCTGCCCCCC"}, {"id": 20, "sequence": "A"}, {"id": 21, "sequence": "G"}, {"id": 22, "sequence": "TATTTTTAGTA"}, {"id": 23, "sequence": "A"}, {"id": 24, "sequence": "G"}, {"id": 25, "sequence": "GA"}], "edge": [{"from": 4, "to": 1}, {"from": 5, "to": 1}, {"from": 1, "to": 2}, {"from": 1, "to": 3}, {"from": 22, "to": 2}, {"from": 2, "to": 20}, {"from": 2, "to": 21}, {"from": 3, "to": 18}, {"from": 5, "to": 4}, {"from": 6, "to": 5}, {"from": 7, "to": 5}, {"from": 8, "to": 6}, {"from": 8, "to": 7}, {"from": 9, "to": 8}, {"from": 10, "to": 8}, {"from": 11, "to": 9}, {"from": 11, "to": 10}, {"from": 12, "to": 11}, {"from": 13, "to": 11}, {"from": 16, "to": 12}, {"from": 17, "to": 12}, {"from": 12, "to": 15}, {"from": 14, "to": 13}, {"from": 18, "to": 19}, {"from": 20, "to": 25}, {"from": 21, "to": 25}, {"from": 23, "to": 22}, {"from": 24, "to": 22}]}
             )";
             
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
-    // Make it into a VG
+    // Load the JSON into a VG
     VG vg;
-    vg.extend(proto_graph);
+    vg::io::json2graph(graph_json, &vg);
             
     SECTION( "handlealgs::topological_order produces a consistent total ordering and orientation" ) {
         auto handle_sort = handlealgs::topological_order(&vg);
@@ -5385,11 +5375,8 @@ TEST_CASE("simplify_siblings() works on a graph with a reversing self loop", "[a
         {"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "2"}, {"from": "2", "to": "2", "to_end": true}], "node": [{"id": "1", "sequence": "T"}, {"id": "2", "sequence": "A"}, {"id": "3", "sequence": "ACA"}], "path": [{"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "2"}, "rank": "2"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"is_reverse": true, "node_id": "2"}, "rank": "3"}], "name": "x"}, {"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 3, "to_length": 3}], "position": {"node_id": "3"}, "rank": "2"}], "name": "y"}]}
     )";
     
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG graph;
-    graph.extend(source);
+    vg::io::json2graph(graph_json, &graph);
             
     
     
@@ -5405,11 +5392,8 @@ TEST_CASE("simplify_siblings() works on a smaller graph with a reversing self lo
         {"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "2"}, {"from": "2", "to": "2", "to_end": true}], "node": [{"id": "1", "sequence": "T"}, {"id": "2", "sequence": "A"}, {"id": "3", "sequence": "A"}], "path": [{"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "2"}, "rank": "2"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"is_reverse": true, "node_id": "2"}, "rank": "3"}], "name": "x"}]}
     )";
     
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG graph;
-    graph.extend(source);
+    vg::io::json2graph(graph_json, &graph);
             
     
     
@@ -5425,11 +5409,8 @@ TEST_CASE("normalize() works on a graph with a reversing self loop", "[algorithm
         {"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "2"}, {"from": "2", "to": "2", "to_end": true}], "node": [{"id": "1", "sequence": "T"}, {"id": "2", "sequence": "A"}, {"id": "3", "sequence": "ACA"}], "path": [{"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "2"}, "rank": "2"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"is_reverse": true, "node_id": "2"}, "rank": "3"}], "name": "x"}, {"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 3, "to_length": 3}], "position": {"node_id": "3"}, "rank": "2"}], "name": "y"}]}
     )";
     
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG graph;
-    graph.extend(source);
+    vg::io::json2graph(graph_json, &graph);
             
     
     
diff --git a/src/unittest/vpkg.cpp b/src/unittest/vpkg.cpp
index 51a849c446..977814ff9c 100644
--- a/src/unittest/vpkg.cpp
+++ b/src/unittest/vpkg.cpp
@@ -13,7 +13,7 @@
 #include "xg.hpp"
 #include "../vg.hpp"
 #include "../snarl_seed_clusterer.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include <gcsa/gcsa.h>
 #include <sstream>
 #include <tuple>
@@ -50,12 +50,12 @@ TEST_CASE("We can read and write XG", "[vpkg][handlegraph][xg]") {
     )";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph hash_graph;
+    vg::io::json2graph(graph_json, &hash_graph);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(hash_graph);
 
     stringstream ss;
     
@@ -148,13 +148,10 @@ TEST_CASE("We can read VG from a VPKG-wrapped stream as a VG", "[vpkg][handlegra
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
@@ -179,13 +176,10 @@ TEST_CASE("We can read VG from a VPKG-wrapped stream as a HandleGraph which is a
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
@@ -210,13 +204,10 @@ TEST_CASE("We can read VG from a VPKG-wrapped stream as a HandleGraph which is a
 
 TEST_CASE("We can read an empty VG as a HandleGraph", "[vpkg][handlegraph][vg][empty]") {
     string graph_json = "{}";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
@@ -240,13 +231,10 @@ TEST_CASE("We prefer to read a graph as the first provided type that matches", "
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
diff --git a/src/unittest/xdrop_aligner.cpp b/src/unittest/xdrop_aligner.cpp
index f745b8f66a..07577e4479 100644
--- a/src/unittest/xdrop_aligner.cpp
+++ b/src/unittest/xdrop_aligner.cpp
@@ -5,7 +5,7 @@
 
 #include <iostream>
 #include <string>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../alignment.hpp"
 #include "../vg.hpp"
 #include <vg/vg.pb.h>
@@ -764,12 +764,9 @@ TEST_CASE("QualAdjXdropAligner will not penalize a low quality mismatch", "[xdro
 TEST_CASE("XdropAligner doesn't crash on a case where it is hard to find a seed", "[xdrop][alignment][mapping]") {
     
     string graph_json = R"({"edge": [{"from": "92345167", "to": "92345168"}, {"from": "92345182", "to": "92345183"}, {"from": "92345165", "to": "92345166"}, {"from": "92345177", "to": "92345178"}, {"from": "92345171", "to": "92345172"}, {"from": "92345161", "to": "92345162"}, {"from": "92345183", "to": "92345184"}, {"from": "92345181", "to": "92345182"}, {"from": "92345178", "to": "92345179"}, {"from": "92345166", "to": "92345167"}, {"from": "92345179", "to": "92345180"}, {"from": "92345173", "to": "92345174"}, {"from": "92345184", "to": "92345185"}, {"from": "92345169", "to": "92345170"}, {"from": "92345185", "to": "92345186"}, {"from": "92345160", "to": "92345161"}, {"from": "92345174", "to": "92345175"}, {"from": "92345162", "to": "92345163"}, {"from": "92345175", "to": "92345176"}, {"from": "92345168", "to": "92345169"}, {"from": "92345163", "to": "92345164"}, {"from": "92345172", "to": "92345173"}, {"from": "92345180", "to": "92345181"}, {"from": "92345176", "to": "92345177"}, {"from": "92345170", "to": "92345171"}, {"from": "92345164", "to": "92345165"}], "node": [{"id": "92345167", "sequence": "TTTATATATATATATTTATATATATATATTTA"}, {"id": "92345182", "sequence": "TATATATATTTATATATATATTTATATATATA"}, {"id": "92345165", "sequence": "ATATATATATATTTATATATATTTATATATTA"}, {"id": "92345177", "sequence": "TTTATATATATATTTATATATATATATTATAT"}, {"id": "92345171", "sequence": "TTATATATATATTTATATATATATTTATATAT"}, {"id": "92345161", "sequence": "ATATATTTATATATTTTTATATATTATATATT"}, {"id": "92345183", "sequence": "TTTATATATATTTATATATATATTTATATATA"}, {"id": "92345181", "sequence": "ATATATTATATATATATTTATATATATATTTA"}, {"id": "92345178", "sequence": "ATATATTTATATATATATTTATATATATATTT"}, {"id": "92345166", "sequence": "TTTATATATATTTATATATATATTTATATATA"}, {"id": "92345179", "sequence": "ATATATATATTTATATATATATTTATATATAT"}, {"id": "92345173", "sequence": "ATATTTATATATATATATTTATATATATATTT"}, {"id": "92345184", "sequence": "TATTTATATATATATTTATATATATTTATATA"}, {"id": "92345169", "sequence": "TTTATATATATATTTATATATATATTTATATA"}, {"id": "92345185", "sequence": "TATATTTATATATATATATATATATTTATATA"}, {"id": "92345160", "sequence": "ATTTATATATATATTTATATATATATTTATAT"}, {"id": "92345174", "sequence": "ATATATATATTTATATATATATTATTTATATA"}, {"id": "92345162", "sequence": "TATATATATATTTATATATTATATATATATTT"}, {"id": "92345175", "sequence": "TATATTTATATATATATTATATATATATTTAT"}, {"id": "92345168", "sequence": "TATATATATTTATATATATATTTATATATATA"}, {"id": "92345163", "sequence": "ATATATTTATATATATATTTATATATATTTAT"}, {"id": "92345172", "sequence": "ATATATATATATTTATATATATATTTATATAT"}, {"id": "92345180", "sequence": "ATTTATATATATATTTATATATATATTTATAT"}, {"id": "92345176", "sequence": "ATATATATATTATATATATATTTATATATATA"}, {"id": "92345170", "sequence": "TATATTTATATATATATATTATATATATATAT"}, {"id": "92345164", "sequence": "ATATATATTTATATATATTTATATATATATTT"}, {"id": "92345186", "sequence": "TATATTTATATATATTTATATATATATTTATA"}]})";
-    
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-    
-    VG graph;
-    graph.extend(source);
+
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     Alignment aln;
     aln.set_sequence("CAGCACTTTGGGAGGCCAAGGTGGGTGGATCATCTGAGGTCAGGAGTTTGAGACCAGCCTGACCAACATGGTGAAATCCTGTCTCTACTGAAAATACTAAAATTAGCCAGGCGTGGCGGCCAGTGCCTGTAATCCCGGCTACTGGGGAGG");
diff --git a/src/unittest/xg.cpp b/src/unittest/xg.cpp
index d74db5d0b0..dfa913b8eb 100644
--- a/src/unittest/xg.cpp
+++ b/src/unittest/xg.cpp
@@ -8,7 +8,9 @@
 #include "vg.hpp"
 #include "xg.hpp"
 #include "graph.hpp"
+#include "../io/json2graph.hpp"
 #include "algorithms/subgraph.hpp"
+#include "bdsg/hash_graph.hpp"
 #include <stdio.h>
 
 namespace vg {
@@ -22,19 +24,18 @@ TEST_CASE("We can build an xg index on a nice graph", "[xg]") {
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     VG vg_graph;
     algorithms::extract_context(xg_index, vg_graph, xg_index.get_handle(1), 0, 100);
     Graph& graph = vg_graph.graph;
-    sort_by_id_dedup_and_clean(graph);
 
     REQUIRE(graph.node_size() == 2);
     REQUIRE(graph.edge_size() == 1);
@@ -49,19 +50,18 @@ TEST_CASE("We can build an xg index on a nasty graph", "[xg]") {
     {"id":9999,"sequence":"AAA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     VG vg_graph;
     algorithms::extract_context(xg_index, vg_graph, xg_index.get_handle(1), 0, 100);
     Graph& graph = vg_graph.graph;
-    sort_by_id_dedup_and_clean(graph);
 
     REQUIRE(graph.node_size() == 2);
     REQUIRE(graph.edge_size() == 1);
@@ -161,15 +161,14 @@ TEST_CASE("We can build an xg index on a very nasty graph", "[xg]") {
     {"position":{"node_id":1444},"rank":1059},
     {"position":{"node_id":1445},"rank":1060}]}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    VG source;
+    vg::io::json2graph(graph_json, &source);
 
-    sort_by_id_dedup_and_clean(proto_graph);
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     SECTION("Context extraction gets something") {
         VG graph;
@@ -182,7 +181,7 @@ TEST_CASE("We can build an xg index on a very nasty graph", "[xg]") {
         
         SECTION("We can extract within a single node") {
             algorithms::extract_path_range(xg_index, xg_index.get_path_handle("17"), 5, 15, graph);
-            
+
             // We should just get node 1416
             REQUIRE(graph.graph.node_size() == 1);
             REQUIRE(graph.graph.node(0).id() == 1416);
@@ -265,14 +264,14 @@ TEST_CASE("We can build and scan an XG index for a problematic graph", "[xg]") {
       ]}
     ]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
 
     // Build the xg index (without any sorting)
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     REQUIRE(xg_index.get_node_count() == 5);
     
@@ -300,18 +299,16 @@ TEST_CASE("We can build the xg index on a small graph with discontinuous node id
     )";
 
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    VG source;
+    vg::io::json2graph(graph_json, &source);
 
-    sort_by_id_dedup_and_clean(proto_graph);
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     VG vg_graph;
     algorithms::extract_context(xg_index, vg_graph, xg_index.get_handle(10), 0, 100);
     Graph& graph = vg_graph.graph;
-    sort_by_id_dedup_and_clean(graph);
 
     REQUIRE(graph.node_size() == 2);
     REQUIRE(graph.edge_size() == 1);
@@ -326,14 +323,14 @@ TEST_CASE("Looping over XG handles in parallel works", "[xg]") {
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     size_t count = 0;
 
@@ -341,7 +338,7 @@ TEST_CASE("Looping over XG handles in parallel works", "[xg]") {
         #pragma omp critical
         count++;
     }, true);
-    
+
     REQUIRE(count == 2);
 
 }
@@ -400,14 +397,14 @@ TEST_CASE("Vectorization of xg works correctly", "[xg]") {
             {"edit": [{"from_length": 11, "to_length": 11}], "position": {"node_id": "15"}, "rank": "10"}
         ], "name": "x"}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
 
     // Build the xg index (without any sorting)
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     REQUIRE(xg_index.get_node_count() == 15);
     
diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp
index dc3255e984..1d0a2c39c7 100644
--- a/src/unittest/zip_code.cpp
+++ b/src/unittest/zip_code.cpp
@@ -117,6 +117,10 @@ using namespace std;
         bool chain_is_reversed = distance_index.is_reversed_in_parent(
                                                 distance_index.get_node_net_handle(n1->id()));
 
+        // Node 4 is in snarl 3 to 6 which should be regular.
+        // The zip codes are going to encode this so it had better be true.
+        REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n4->id())))));
+
         SECTION ("zip code for node on top-level chain") {
             ZipCode zipcode;
             zipcode.fill_in_zipcode_from_pos(distance_index, make_pos_t(n1->id(), 0, false));
diff --git a/src/zip_code.cpp b/src/zip_code.cpp
index 4699a24494..051602443f 100644
--- a/src/zip_code.cpp
+++ b/src/zip_code.cpp
@@ -1,3 +1,5 @@
+#include "crash.hpp"
+
 #include "zip_code.hpp"
 
 //#define DEBUG_ZIPCODE
@@ -16,10 +18,11 @@ void ZipCode::fill_in_zipcode_from_pos(const SnarlDistanceIndex& distance_index,
     //Put all ancestors of the node in a vector, starting from the node, and not including the root
     while (!distance_index.is_root(current_handle)) {
         ancestors.emplace_back(distance_index.start_end_traversal_of(current_handle));
-        current_handle = distance_index.get_parent(current_handle);
+        net_handle_t parent_handle = distance_index.get_parent(current_handle);
+        crash_unless(parent_handle != current_handle);
+        current_handle = parent_handle;
     }
 
-
     //Now add the root-level snarl or chain
     if (distance_index.is_root_snarl(current_handle)) {
         //First thing is a snarl, so add the snarl's connected component number
@@ -121,7 +124,7 @@ void ZipCode::fill_in_zipcode_from_pos(const SnarlDistanceIndex& distance_index,
                 }
                 return;
             }
-        } else if (distance_index.is_regular_snarl(current_ancestor, false, graph_ptr)) {
+        } else if (distance_index.is_regular_snarl(current_ancestor)) {
             snarl_code_t snarl_code = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); 
             zipcode.add_value(snarl_code.get_raw_code_type());
             zipcode.add_value(snarl_code.get_raw_prefix_sum_or_identifier());
@@ -1065,11 +1068,7 @@ ZipCode::snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl,
     snarl_code.set_code_type(1);
 
     //The number of children
-    size_t child_count = 0;
-    distance_index.for_each_child(snarl, [&] (const net_handle_t& child) {
-        child_count++;
-    });
-    snarl_code.set_child_count(child_count);
+    snarl_code.set_child_count(distance_index.get_snarl_child_count(snarl));
 
     //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node
     net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false));
@@ -1100,11 +1099,7 @@ ZipCode::snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snar
     snarl_code.set_code_type(distance_index.is_dag(snarl) ? 0 : 2);
 
     //The number of children
-    size_t child_count = 0;
-    distance_index.for_each_child(snarl, [&] (const net_handle_t& child) {
-        child_count++;
-    });
-    snarl_code.set_child_count(child_count);
+    snarl_code.set_child_count(distance_index.get_snarl_child_count(snarl));
 
     //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node
     net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false));