Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 27 additions & 41 deletions src/subcommand/circularize_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@ using namespace vg::subcommand;

void help_circularize(char** argv) {
cerr << "usage: " << argv[0] << " circularize [options] <graph.vg> > [circularized.vg]" << endl
<< "Makes specific paths or nodes in a graph circular." << endl
<< "Make specific paths or nodes in a graph circular by connecting head/tail." << endl
<< endl
<< "options:" << endl
<< " -p, --path NAME circularize the path by connecting its head/tail node" << endl
<< " -p, --path NAME circularize the path [may repeat]" << endl
<< " -P, --pathfile FILE circularize all paths in the provided file" << endl
<< " -a, --head ID circularize a head and tail node (must provide a tail)" << endl
<< " -z, --tail ID circularize a head and tail node (must provide a head)" << endl
<< " -d, --describe list all the paths in the graph" << endl
<< " -h, --help print this help message to stderr and exit" << endl;
exit(1);
}
Expand All @@ -43,11 +42,11 @@ int main_circularize(int argc, char** argv) {
exit(1);
}

string path = "";
vector<string> paths_to_circularize;
string pathfile = "";
bool describe = false;
vg::id_t head = -1;
vg::id_t tail = -1;
const vg::id_t DEFAULT_ID = std::numeric_limits<nid_t>::max();
vg::id_t head = DEFAULT_ID;
vg::id_t tail = DEFAULT_ID;


int c;
Expand Down Expand Up @@ -80,13 +79,14 @@ int main_circularize(int argc, char** argv) {
tail = parse<int>(optarg);
break;
case 'p':
path = optarg;
paths_to_circularize.emplace_back(optarg);
break;
case 'P':
pathfile = require_exists(logger, optarg);
break;
case 'd':
describe = true;
logger.error() << "vg circularize --describe has been removed."
<< " Use vg paths --list" << std::endl;
break;
case 'h':
case '?':
Expand All @@ -98,27 +98,24 @@ int main_circularize(int argc, char** argv) {
}
}

vector<string> paths_to_circularize;
if (!((head * tail) > 0)) {
help_circularize(argv);
if ((head == DEFAULT_ID) != (tail == DEFAULT_ID)) {
logger.error() << "Both a head and tail node must be provided" << endl;
} else if (tail < head) {
logger.error() << "Tail " << tail << " is smaller than head " << head << endl;
}
if (pathfile != "") {

if (pathfile != "") {
string line;
ifstream pfi;
pfi.open(pathfile);
if (!pfi.good()){
if (!pfi.good()) {
help_circularize(argv);
logger.error() << "There is an error with the input file." << endl;
}
while (getline(pfi, line)){
while (getline(pfi, line)) {
paths_to_circularize.push_back(line);
}
pfi.close();

}
else if (path != "") {
paths_to_circularize.push_back(path);
}

// TODO: if we settle on a uniform serialzation method that covers the VG class, the code is ready to be switched
Expand All @@ -127,32 +124,21 @@ int main_circularize(int argc, char** argv) {
graph = new VG(in);
});

// Check if paths are in graph:
for (const string& p : paths_to_circularize) {
if (!graph->has_path(p)) {
logger.error() << "Path not in graph \"" << p << "\"" << endl;
}
if (head != DEFAULT_ID) {
graph->create_edge(graph->get_handle(tail), graph->get_handle(head));
}

if (describe){
graph->for_each_path_handle([&](const path_handle_t& path_handle) {
cout << graph->get_path_name(path_handle) << endl;
});
exit(0);
}
for (const auto& path_name : paths_to_circularize) {
if (!graph->has_path(path_name)) {
logger.error() << "Path not in graph \"" << path_name << "\"" << endl;
}

if (head > 0 && tail > head){
graph->create_edge(graph->get_handle(tail), graph->get_handle(head));
}
else{
for (const auto& path_name : paths_to_circularize) {
path_handle_t path = graph->get_path_handle(path_name);
if (graph->get_step_count(path) > 0) {
graph->create_edge(graph->get_handle_of_step(graph->path_back(path)),
graph->get_handle_of_step(graph->path_begin(path)));
}
graph->set_circularity(path, true);
path_handle_t path = graph->get_path_handle(path_name);
if (graph->get_step_count(path) > 0) {
graph->create_edge(graph->get_handle_of_step(graph->path_back(path)),
graph->get_handle_of_step(graph->path_begin(path)));
}
graph->set_circularity(path, true);
}

graph->serialize_to_ostream(cout);
Expand Down
23 changes: 20 additions & 3 deletions test/t/25_circularize.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 2
plan tests 7

vg construct -r tiny/tiny.fa -v tiny/tiny.vcf.gz | vg circularize -p x - > circular.vg
vg construct -r tiny/tiny.fa -v tiny/tiny.vcf.gz > linear.vg
vg circularize -p x linear.vg > circular.vg

is $(vg view -j circular.vg | jq -c '.path[] | select(.is_circular)' | wc -l) 1 "a path may be circularized"

Expand All @@ -16,5 +17,21 @@ vg convert circular.xg -v > extracted.vg

is $(vg view -j extracted.vg | jq -c '.path[] | select(.is_circular)' | wc -l) 1 "a circular path survives a round trip to/from xg"

rm -f circular.vg circular.xg extracted.vg
vg circularize -p y linear.vg
is $? 1 "Not allowed to circularize a nonexistent path (--path)"

echo "y" > paths.txt
vg circularize -P paths.txt linear.vg
is $? 1 "Not allowed to circularize a nonexistent path (--pathfile)"

vg circularize -a 2 -z 1 linear.vg
is $? 1 "Not allowed to have tail ID smaller than head ID"

vg circularize -a 1 linear.vg
is $? 1 "Not allowed to have only a head ID"

vg circularize -z 2 linear.vg
is $? 1 "Not allowed to have only a tail ID"

rm -f circular.vg circular.xg extracted.vg linear.vg paths.txt

Loading