diff --git a/.Rbuildignore b/.Rbuildignore index 272bf8d..91114bf 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,2 @@ -..Rcheck ^.*\.Rproj$ ^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 158593b..d3b7c26 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,11 @@ # produced vignettes vignettes/*.html vignettes/*.pdf +vignettes/*_cache/* +vignettes/*.R + +.build.timestamp +build/ # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 .httr-oauth diff --git a/DESCRIPTION b/DESCRIPTION old mode 100755 new mode 100644 diff --git a/NAMESPACE b/NAMESPACE old mode 100755 new mode 100644 diff --git a/R/data_description.R b/R/data_description.R old mode 100755 new mode 100644 diff --git a/R/script_disease_relevant_tissues.R b/R/script_disease_relevant_tissues.R old mode 100755 new mode 100644 index 47a2e79..7b215ac --- a/R/script_disease_relevant_tissues.R +++ b/R/script_disease_relevant_tissues.R @@ -5,12 +5,16 @@ #'@param tissue_expr_data a data matrix containing gene expression values. #'@return a numeric matrix of gene expression values in the form of Z-scores. Columns are tissues and rows are diseases. #'@export -tissue.expr <- function(tissue_expr_data){ - if(is.null(rownames(tissue_expr_data))|is.null(colnames(tissue_expr_data))){ +tissue.expr <- function(tissue_expr_data) { + if (is.null(rownames(tissue_expr_data)) | + is.null(colnames(tissue_expr_data))) { stop('Both colnames and rownames for tissue_expr_data must be provided!') } - tissue_expr_data <- tissue_expr_data[!rowSums(tissue_expr_data)==0,] - tissue_expr_zscore <- t(apply(tissue_expr_data, 1, function(x) (x-mean(x))/sd(x))) + tissue_expr_data <- + tissue_expr_data[!rowSums(tissue_expr_data) == 0, ] + tissue_expr_zscore <- + t(apply(tissue_expr_data, 1, function(x) + (x - mean(x)) / sd(x))) return(tissue_expr_zscore) } @@ -31,9 +35,12 @@ tissue.expr <- function(tissue_expr_data){ #'@import RCurl #'@import XML #'@import SPARQL -disease.vrnts <- function(x, id_type='efo', min_score=0, curated=F){ - endpoint<-"http://rdf.disgenet.org/sparql/" - partI<-'PREFIX rdf: +disease.vrnts <- function(x, + id_type = 'efo', + min_score = 0, + curated = F) { + endpoint <- "http://rdf.disgenet.org/sparql/" + partI <- 'PREFIX rdf: PREFIX rdfs: PREFIX owl: PREFIX xsd: @@ -57,11 +64,14 @@ disease.vrnts <- function(x, id_type='efo', min_score=0, curated=F){ ?score WHERE {?disease skos:exactMatch ' - if(id_type=='mesh') url<-paste(' .',sep='') - else if(id_type=='omim') url<-paste(' .',sep='') - else if (id_type=='efo') url<-paste(' .',sep='') + if (id_type == 'mesh') + url <- paste(' .', sep = '') + else if (id_type == 'omim') + url <- paste(' .', sep = '') + else if (id_type == 'efo') + url <- paste(' .', sep = '') - partII<-'?gene sio:SIO_000205 ?GeneSymbol . + partII <- '?gene sio:SIO_000205 ?GeneSymbol . ?scoreIRI sio:SIO_000300 ?score . ?variant so:associated_with ?gene . ?vda sio:SIO_000628 ?variant,?disease ; @@ -73,29 +83,46 @@ disease.vrnts <- function(x, id_type='efo', min_score=0, curated=F){ ?scoreIRI rdf:type ncit:C25338 . ?variant a so:0001060 .' - filter<-paste('FILTER(?score >= ',min_score,')',sep='') + filter <- paste('FILTER(?score >= ', min_score, ')', sep = '') - if(curated==T){ - filterII<-'FILTER (!regex(?source,"BEFREE"))' - filter<-paste(filter,filterII,sep='\n ') + if (curated == T) { + filterII <- 'FILTER (!regex(?source,"BEFREE"))' + filter <- paste(filter, filterII, sep = '\n ') } - partIII<-'} ORDER BY ?GeneSymbol' + partIII <- '} ORDER BY ?GeneSymbol' - query<-paste(partI,url,partII,filter,partIII,sep='\n ') + query <- paste(partI, url, partII, filter, partIII, sep = '\n ') - res<-SPARQL::SPARQL(endpoint,query)$results + res <- SPARQL::SPARQL(endpoint, query)$results - if(nrow(res !=0)){ - etz <- sapply(strsplit(res$gene,'/'),function(x)gsub('>','',tail(x,1))) - symb <- sapply(strsplit(res$GeneSymbol,'/'),function(x)gsub('>','',tail(x,1))) + if (nrow(res != 0)) { + etz <- + sapply(strsplit(res$gene, '/'), function(x) + gsub('>', '', tail(x, 1))) + symb <- + sapply(strsplit(res$GeneSymbol, '/'), function(x) + gsub('>', '', tail(x, 1))) scr <- res$score - df <- data.frame(entrez=etz,symbol=symb,score=scr,stringsAsFactors = F) - df <- df[!duplicated(df),] - df <- df[order(df$score,decreasing=T),] + df <- + data.frame( + entrez = etz, + symbol = symb, + score = scr, + stringsAsFactors = F + ) + df <- df[!duplicated(df), ] + df <- df[order(df$score, decreasing = T), ] } - else df <- data.frame(entrez=character(),symbol=character(),score=numeric(),stringsAsFactors = F) + else + df <- + data.frame( + entrez = character(), + symbol = character(), + score = numeric(), + stringsAsFactors = F + ) return(df) } @@ -142,104 +169,170 @@ disease.vrnts <- function(x, id_type='efo', min_score=0, curated=F){ #'@importFrom Rdpack reprompt #'@importFrom igraph V E graph_from_data_frame distances induced.subgraph clusters #'@importFrom scales rescale -dis.rel.tissues <- function(disease_genes, ppi_network, weighted = FALSE, tissue_expr_data, thr = 1, top = 20, rand = 100, verbose=FALSE){ - if(is.null(rownames(tissue_expr_data))|is.null(colnames(tissue_expr_data))){ - stop('Both colnames and rownames for tissue_expr_data must be provided!') - } - if(!is.character(ppi_network[,1])) ppi_network[,1] <- as.character(ppi_network[,1]) - if(!is.character(ppi_network[,2])) ppi_network[,2] <- as.character(ppi_network[,2]) - ppi_network <- ppi_network[!duplicated(ppi_network[,1:2]),] - ppi_network_node<-unique(unlist(ppi_network[,1:2])) - universe <- intersect(ppi_network_node,rownames(tissue_expr_data)) - if(length(universe)==0) stop('No corresponding IDs between ppi_network and tissue_expr_data!') - else if(length(universe)!=length(ppi_network_node)| - length(universe)!=nrow(tissue_expr_data)){ - if (verbose) print(paste(length(universe),'IDs in common between ppi_network and tissue_expr_data will be considered.', sep=' ')) - tissue_expr_data<-tissue_expr_data[universe,] - } - disease_genes_size<-length(disease_genes) - disease_genes<-intersect(disease_genes,universe) - if(length(disease_genes)==0) stop('No disease-associated ID match with ppi_network and tissue_expr_data!') - else if( disease_genes_size!=length(disease_genes)){ - if(verbose) print(paste(length(disease_genes),'disease-associated IDs match with ppi_network and tissue_expr_data. Computing z-scores for disease-tissue pairs...',sep=' ')) - } - gene_sets <- list() - max_comps <- c() - sd_rand <- c() - mean_rand <- c() - zscores <- c() - tissue_genes <- sapply(colnames(tissue_expr_data), function(i) rownames(tissue_expr_data)[tissue_expr_data[,i]>=thr]) - for(tgs in tissue_genes) { - # intersect the ppi network with tissue-specific genes - df <- ppi_network[ppi_network[,1] %in% tgs & ppi_network[,2] %in% tgs,] - g <- igraph::graph_from_data_frame(df[,1:2], directed = FALSE) - genes <- intersect(disease_genes, igraph::V(g)$name) - if(length(genes) > 0){ - if(top > 0) { - # calculate the radial distance between each gene and all disease-relevant genes - if(weighted) igraph::E(g)$weight <- scales::rescale(as.numeric(df[,3]),c(1,.Machine$double.eps)) - rad_dist <- igraph::distances(g, to=intersect(disease_genes,igraph::V(g)$name), mode = "out", weights = NULL, algorithm = "dijkstra") - rad_dist <- rowMeans(rad_dist) - genes <- unique(c(genes, names(sort(rad_dist))[1:top])) # extended set of disease-relevant genes - } +dis.rel.tissues <- + function(disease_genes, + ppi_network, + weighted = FALSE, + tissue_expr_data, + thr = 1, + top = 20, + rand = 100, + verbose = FALSE) { + if (is.null(rownames(tissue_expr_data)) | + is.null(colnames(tissue_expr_data))) { + stop('Both colnames and rownames for tissue_expr_data must be provided!') } - gene_sets[[length(gene_sets)+1]] <- genes - # calculate the maximal components by considering disease-relevant genes - if(length(genes) > 1){ - gs <- igraph::induced.subgraph(graph=g, vids=genes) - max_comps <- c(max_comps, max(igraph::clusters(gs)$csize)) - # random - rand_max_comps <- c() - for(i in 1:rand) { - gr <- igraph::induced.subgraph(graph=g, vids=sample(igraph::V(g)$name, length(genes), replace = T)) - rand_max_comps <- c(rand_max_comps, max(igraph::clusters(gr)$csize)) - } - #hist(rand_max_comps) - mean_rand <- c(mean_rand, mean(rand_max_comps)) - sd_rand <- c(sd_rand, sd(rand_max_comps)) - zscores <- c(zscores, (max_comps[length(max_comps)] - mean(rand_max_comps)) / sd(rand_max_comps)) + if (!is.character(ppi_network[, 1])) + ppi_network[, 1] <- as.character(ppi_network[, 1]) + if (!is.character(ppi_network[, 2])) + ppi_network[, 2] <- as.character(ppi_network[, 2]) + ppi_network <- ppi_network[!duplicated(ppi_network[, 1:2]), ] + ppi_network_node <- unique(unlist(ppi_network[, 1:2])) + universe <- intersect(ppi_network_node, rownames(tissue_expr_data)) + if (length(universe) == 0) + stop('No corresponding IDs between ppi_network and tissue_expr_data!') + else if (length(universe) != length(ppi_network_node) | + length(universe) != nrow(tissue_expr_data)) { + if (verbose) + print( + paste( + length(universe), + 'IDs in common between ppi_network and tissue_expr_data will be considered.', + sep = ' ' + ) + ) + tissue_expr_data <- tissue_expr_data[universe, ] } - else{ - max_comps <- c(max_comps, length(genes)) - mean_rand <- c(mean_rand, length(genes)) - sd_rand <- c(sd_rand, 0) - zscores <- c(zscores, 0) + disease_genes_size <- length(disease_genes) + disease_genes <- intersect(disease_genes, universe) + if (length(disease_genes) == 0) + stop('No disease-associated ID match with ppi_network and tissue_expr_data!') + else if (disease_genes_size != length(disease_genes)) { + if (verbose) + print( + paste( + length(disease_genes), + 'disease-associated IDs match with ppi_network and tissue_expr_data. Computing z-scores for disease-tissue pairs...', + sep = ' ' + ) + ) } + gene_sets <- list() + max_comps <- c() + sd_rand <- c() + mean_rand <- c() + zscores <- c() + tissue_genes <- + sapply(colnames(tissue_expr_data), function(i) + rownames(tissue_expr_data)[tissue_expr_data[, i] >= thr]) + for (tgs in tissue_genes) { + # intersect the ppi network with tissue-specific genes + df <- + ppi_network[ppi_network[, 1] %in% tgs & ppi_network[, 2] %in% tgs, ] + g <- igraph::graph_from_data_frame(df[, 1:2], directed = FALSE) + genes <- intersect(disease_genes, igraph::V(g)$name) + if (length(genes) > 0) { + if (top > 0) { + # calculate the radial distance between each gene and all disease-relevant genes + if (weighted) + igraph::E(g)$weight <- + scales::rescale(as.numeric(df[, 3]), c(1, .Machine$double.eps)) + rad_dist <- + igraph::distances( + g, + to = intersect(disease_genes, igraph::V(g)$name), + mode = "out", + weights = NULL, + algorithm = "dijkstra" + ) + rad_dist <- rowMeans(rad_dist) + genes <- + unique(c(genes, names(sort(rad_dist))[1:top])) # extended set of disease-relevant genes + } + } + gene_sets[[length(gene_sets) + 1]] <- genes + # calculate the maximal components by considering disease-relevant genes + if (length(genes) > 1) { + gs <- igraph::induced.subgraph(graph = g, vids = genes) + max_comps <- c(max_comps, max(igraph::clusters(gs)$csize)) + # random + rand_max_comps <- c() + for (i in 1:rand) { + gr <- + igraph::induced.subgraph(graph = g, + vids = sample(igraph::V(g)$name, length(genes), replace = T)) + rand_max_comps <- + c(rand_max_comps, max(igraph::clusters(gr)$csize)) + } + #hist(rand_max_comps) + mean_rand <- c(mean_rand, mean(rand_max_comps)) + sd_rand <- c(sd_rand, sd(rand_max_comps)) + zscores <- + c(zscores, (max_comps[length(max_comps)] - mean(rand_max_comps)) / sd(rand_max_comps)) + } + else{ + max_comps <- c(max_comps, length(genes)) + mean_rand <- c(mean_rand, length(genes)) + sd_rand <- c(sd_rand, 0) + zscores <- c(zscores, 0) + } + } + final_df <- + data.frame( + maxc = max_comps, + mer = mean_rand, + sdr = sd_rand, + z = zscores, + genes = sapply(gene_sets, length) + ) + rownames(final_df) <- colnames(tissue_expr_data) + return(final_df) } - final_df <- data.frame(maxc = max_comps, mer = mean_rand, sdr = sd_rand, z = zscores, genes = sapply(gene_sets, length)) - rownames(final_df) <- colnames(tissue_expr_data) - return(final_df) -} #'@rdname dis.rel.tissues #'@export #'@importFrom snow makeCluster stopCluster #'@importFrom doParallel registerDoParallel #'@importFrom foreach foreach %dopar% -list.dis.rel.tissues <- function(disease_gene_list, ppi_network, weighted = FALSE, tissue_expr_data, thr = 1, top = 20, rand = 100, parallel = NULL) { - if(!is.list(disease_gene_list))stop('Argument disease_gene_list is not a list!') - if(is.null(names(disease_gene_list)))stop('Names for disease_gene_list must be provided!') - mat_set_tiss_zscore <- NULL - if(!is.null(parallel)) { - cl <- snow::makeCluster(parallel) - doParallel::registerDoParallel(cl) - `%dopar%` <- foreach::`%dopar%` - dis_rlvnt_tiss <- foreach::foreach(i=disease_gene_list, .export = 'get.disease.relevant.tissues') %dopar% - get.disease.relevant.tissues(i, ppi_network, weighted, tissue_expr_data, thr, top, rand) - snow::stopCluster(cl) - } - else { - warning("A parallel computation is highly recommended",immediate. = T) - dis_rlvnt_tiss <- list() - for(d in disease_gene_list) { - dis_rlvnt_tiss[[length(dis_rlvnt_tiss)+1]] <- get.disease.relevant.tissues(d, ppi_network, weighted, tissue_expr_data, thr, top, rand) +list.dis.rel.tissues <- + function(disease_gene_list, + ppi_network, + weighted = FALSE, + tissue_expr_data, + thr = 1, + top = 20, + rand = 100, + parallel = NULL) { + if (!is.list(disease_gene_list)) + stop('Argument disease_gene_list is not a list!') + if (is.null(names(disease_gene_list))) + stop('Names for disease_gene_list must be provided!') + mat_set_tiss_zscore <- NULL + if (!is.null(parallel)) { + cl <- snow::makeCluster(parallel) + doParallel::registerDoParallel(cl) + `%dopar%` <- foreach::`%dopar%` + dis_rlvnt_tiss <- + foreach::foreach(i = disease_gene_list, .export = 'get.disease.relevant.tissues') %dopar% + ThETA::get.disease.relevant.tissues(i, ppi_network, weighted, tissue_expr_data, thr, top, rand) + snow::stopCluster(cl) + } + else { + warning("A parallel computation is highly recommended", immediate. = T) + dis_rlvnt_tiss <- list() + for (d in disease_gene_list) { + dis_rlvnt_tiss[[length(dis_rlvnt_tiss) + 1]] <- + ThETA::get.disease.relevant.tissues(d, ppi_network, weighted, tissue_expr_data, thr, top, rand) + } } + names(dis_rlvnt_tiss) <- names(disease_gene_list) + df <- + do.call(function(...) + mapply(rbind, ..., SIMPLIFY = F), dis_rlvnt_tiss) + for (i in 1:length(df)) + colnames(df[[i]]) <- colnames(tissue_expr_data) + return(df) } - names(dis_rlvnt_tiss) <- names(disease_gene_list) - df <- do.call(function(...)mapply(rbind,...,SIMPLIFY=F),dis_rlvnt_tiss) - for (i in 1:length(df)) colnames(df[[i]]) <- colnames(tissue_expr_data) - return(df) -} #'Interactive visualization of tissue-specific networks. @@ -263,127 +356,197 @@ list.dis.rel.tissues <- function(disease_gene_list, ppi_network, weighted = FALS #'@export #'@importFrom clusterProfiler enrichKEGG #'@importFrom clusterProfiler enrichGO -visualize.graph <- function(tissue_scores, disease_genes, ppi_network, directed_network = FALSE, - tissue_expr_data, top_targets = NULL, db='kegg',verbose = FALSE){ - - if(is.null(top_targets)) stop('Please specifiy a set of targets (ENTREZ ids)!') - if(is.null(rownames(tissue_expr_data))|is.null(colnames(tissue_expr_data))){ - stop('Both colnames and rownames for tissue_expr_data must be provided.')} - for(i in 1:2) ppi_network[,i] <- as.character(ppi_network[,i]) - ppi_network <- ppi_network[!duplicated(ppi_network[,1:2]),] - ppi_network_size <- nrow(ppi_network) - if(directed_network) idx <- ppi_network[,1]%in%rownames(tissue_expr_data) - else idx <- ppi_network[,1]%in%rownames(tissue_expr_data) & ppi_network[,2]%in%rownames(tissue_expr_data) - ppi_network <- ppi_network[idx,] - if(nrow(ppi_network)==0) stop('No corresponding IDs between ppi_network and tissue_expr_data.') - else if(ppi_network_size!=nrow(ppi_network)){ - if(verbose) print(paste(nrow(ppi_network),'out of',ppi_network_size,'network edges selected.', sep=' ')) - } - if(!directed_network){ - if(verbose) print('Undirect network. Converting to direct network...') - ppi_network_rev <- ppi_network[,c(2:1)] - colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] - ppi_network <- rbind(ppi_network[,1:2],ppi_network_rev) - } - disease_genes_size <- length(disease_genes) - disease_genes <- intersect(disease_genes,ppi_network[,2]) - if(length(disease_genes)==0) stop('No disease-associated ID match with ppi_network and tissue_expr_data!') - else if( disease_genes_size!=length(disease_genes)){ - if(verbose) print(paste(length(disease_genes),'disease-associated IDs are reachable from the network.',sep=' ')) - } - if(!(db %in% c('kegg','BP','MF','CC'))) stop('Unused value for argument db') - idx <- order(tissue_scores$avg_tissue_score,decreasing=T) - tissue_scores <- tissue_scores[idx,] - g <- igraph::graph_from_edgelist(as.matrix(ppi_network[,1:2]), directed=T) - tissue_expr_data <- scales::rescale(tissue_expr_data,c(1,.Machine$double.eps)) - sign_tiss <- colnames(tissue_scores)[-ncol(tissue_scores)] - sh.path <- lapply(sign_tiss,function(i){ - igraph::E(g)$weight <- tissue_expr_data[ppi_network[,1],i] - lapply(top_targets,function(x) - igraph::shortest_paths(g, from=x, to=disease_genes, mode = "out", weights = NULL,output='epath')$epath) - }) - names(sh.path) <- sign_tiss - for(i in names(sh.path)) names(sh.path[[i]]) <- top_targets - all_target_path <-lapply(sh.path,function(x)lapply(x,function(y)do.call(c,y))) - all_tissue_path <- lapply(all_target_path,function(x)do.call(c,x)) - ids <- lapply(all_tissue_path,igraph::as_ids) - shinyApp( - ui = fluidPage( - fluidRow( +visualize.graph <- + function(tissue_scores, + disease_genes, + ppi_network, + directed_network = FALSE, + tissue_expr_data, + top_targets = NULL, + db = 'kegg', + verbose = FALSE) { + library(shiny) + if (is.null(top_targets)) + stop('Please specifiy a set of targets (ENTREZ ids)!') + if (is.null(rownames(tissue_expr_data)) | + is.null(colnames(tissue_expr_data))) { + stop('Both colnames and rownames for tissue_expr_data must be provided.') + } + for (i in 1:2) + ppi_network[, i] <- as.character(ppi_network[, i]) + ppi_network <- ppi_network[!duplicated(ppi_network[, 1:2]), ] + ppi_network_size <- nrow(ppi_network) + if (directed_network) + idx <- ppi_network[, 1] %in% rownames(tissue_expr_data) + else + idx <- + ppi_network[, 1] %in% rownames(tissue_expr_data) & + ppi_network[, 2] %in% rownames(tissue_expr_data) + ppi_network <- ppi_network[idx, ] + if (nrow(ppi_network) == 0) + stop('No corresponding IDs between ppi_network and tissue_expr_data.') + else if (ppi_network_size != nrow(ppi_network)) { + if (verbose) + print(paste( + nrow(ppi_network), + 'out of', + ppi_network_size, + 'network edges selected.', + sep = ' ' + )) + } + if (!directed_network) { + if (verbose) + print('Undirect network. Converting to direct network...') + ppi_network_rev <- ppi_network[, c(2:1)] + colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] + ppi_network <- rbind(ppi_network[, 1:2], ppi_network_rev) + } + disease_genes_size <- length(disease_genes) + disease_genes <- intersect(disease_genes, ppi_network[, 2]) + if (length(disease_genes) == 0) + stop('No disease-associated ID match with ppi_network and tissue_expr_data!') + else if (disease_genes_size != length(disease_genes)) { + if (verbose) + print(paste( + length(disease_genes), + 'disease-associated IDs are reachable from the network.', + sep = ' ' + )) + } + if (!(db %in% c('kegg', 'BP', 'MF', 'CC'))) + stop('Unused value for argument db') + idx <- order(tissue_scores$avg_tissue_score, decreasing = T) + tissue_scores <- tissue_scores[idx, ] + g <- + igraph::graph_from_edgelist(as.matrix(ppi_network[, 1:2]), directed = T) + tissue_expr_data <- + scales::rescale(tissue_expr_data, c(1, .Machine$double.eps)) + sign_tiss <- colnames(tissue_scores)[-ncol(tissue_scores)] + sh.path <- lapply(sign_tiss, function(i) { + igraph::E(g)$weight <- tissue_expr_data[ppi_network[, 1], i] + lapply(top_targets, function(x) + igraph::shortest_paths( + g, + from = x, + to = disease_genes, + mode = "out", + weights = NULL, + output = 'epath' + )$epath) + }) + names(sh.path) <- sign_tiss + for (i in names(sh.path)) + names(sh.path[[i]]) <- top_targets + all_target_path <- + lapply(sh.path, function(x) + lapply(x, function(y) + do.call(c, y))) + all_tissue_path <- lapply(all_target_path, function(x) + do.call(c, x)) + ids <- lapply(all_tissue_path, igraph::as_ids) + shinyApp( + ui = fluidPage(fluidRow( column( width = 2, - checkboxGroupInput("tissue","Select tissue/s",sign_tiss,selected = sign_tiss[1]) + checkboxGroupInput("tissue", "Select tissue/s", sign_tiss, selected = sign_tiss[1]) ), - column( - width = 12, - visNetworkOutput("network") - ) + column(width = 12, + visNetworkOutput("network")) ), - fluidRow( - column( - width = 12, - tableOutput("table") - ) - ) - ), - server = function(input, output) { - output$network <- renderVisNetwork({ - validate(need(input$tissue!='','Please select one or more tissues')) - if(length(input$tissue)>1){ - edge <- strsplit(Reduce(intersect,ids[input$tissue]),split='|',fixed=T) - } - else if (length(input$tissue) ==1){ - edge <- strsplit(ids[[input$tissue]],split='|',fixed=T) - } - edge <- as.data.frame(do.call(rbind,edge)) - edge <- edge[!duplicated(edge),] - colnames(edge) <- c('from','to') - edge$width <- scales::rescale(rowMeans(1-tissue_expr_data[edge$from,input$tissue,drop=F]),c(1,5)) - nb <- unique(unlist(edge[,1:2])) - node <- data.frame(id=nb,label=nb,stringsAsFactors = F) - node$value <- rowMeans(tissue_scores[nb,input$tissue,drop=F]) - gp <- rep('bridge gene',nrow(node)) - gp[node$label%in%top_targets] <- 'target gene' - gp[node$label%in%disease_genes] <- 'disease gene' - node$group <- gp - node$shape <- c("circle","star","diamond")[as.numeric(as.factor(gp))] - visNetwork::visNetwork(node, edge, height = "1500px", width = "500%") %>% - visGroups(groupname = "target gene", color = list(background = "skyblue", border = "deepskyblue")) %>% - visGroups(groupname = "disease gene", color = list(background = "lightcoral", border = "red")) %>% - visGroups(groupname = "bridge gene", shape="circle") %>% - visLegend(addNodes=list( - list(label = "disease gene", shape = "star", - color = list(background = "lightcoral", border = "red")), - list(label = "target gene", shape = "diamond", - color = list(background = "skyblue", border = "deepskyblue")), - list(label = "bridge gene", shape = "circle")), - useGroups = FALSE,width=0.1) %>% - visLayout(hierarchical = TRUE) %>% # visLayout(randomSeed = 123) - visEvents(select = "function(nodes) { + fluidRow(column( + width = 12, + tableOutput("table") + ))), + server = function(input, output) { + output$network <- renderVisNetwork({ + validate(need(input$tissue != '', 'Please select one or more tissues')) + if (length(input$tissue) > 1) { + edge <- + strsplit(Reduce(intersect, ids[input$tissue]), + split = '|', + fixed = T) + } + else if (length(input$tissue) == 1) { + edge <- strsplit(ids[[input$tissue]], split = '|', fixed = T) + } + edge <- as.data.frame(do.call(rbind, edge)) + edge <- edge[!duplicated(edge), ] + colnames(edge) <- c('from', 'to') + edge$width <- + scales::rescale(rowMeans(1 - tissue_expr_data[edge$from, input$tissue, drop = + F]), c(1, 5)) + nb <- unique(unlist(edge[, 1:2])) + node <- data.frame(id = nb, + label = nb, + stringsAsFactors = F) + node$value <- + rowMeans(tissue_scores[nb, input$tissue, drop = F]) + gp <- rep('bridge gene', nrow(node)) + gp[node$label %in% top_targets] <- 'target gene' + gp[node$label %in% disease_genes] <- 'disease gene' + node$group <- gp + node$shape <- + c("circle", "star", "diamond")[as.numeric(as.factor(gp))] + visNetwork::visNetwork(node, edge, height = "1500px", width = "500%") %>% + visNetwork::visGroups( + groupname = "target gene", + color = list(background = "skyblue", border = "deepskyblue") + ) %>% + visNetwork::visGroups( + groupname = "disease gene", + color = list(background = "lightcoral", border = "red") + ) %>% + visNetwork::visGroups(groupname = "bridge gene", shape = "circle") %>% + visNetwork::visLegend( + addNodes = list( + list( + label = "disease gene", + shape = "star", + color = list(background = "lightcoral", border = "red") + ), + list( + label = "target gene", + shape = "diamond", + color = list(background = "skyblue", border = "deepskyblue") + ), + list(label = "bridge gene", shape = "circle") + ), + useGroups = FALSE, + width = 0.1 + ) %>% + visNetwork::visLayout(hierarchical = TRUE) %>% # visLayout(randomSeed = 123) + visNetwork::visEvents(select = "function(nodes) { Shiny.onInputChange('current_node_id', nodes.nodes); ;}") - }) - output$table <- renderTable({ - validate(need(input$tissue!='',NULL), - need(input$current_node_id!=''&input$current_node_id%in%top_targets,'Please select a target gene')) - current_target_path <- sapply(all_target_path[input$tissue],function(x) x[input$current_node_id]) - current_target_path <- Reduce(igraph::intersection,current_target_path) - target_interactors <- unique(c(igraph::ends(g,current_target_path))) - if(db=='kegg') res <- clusterProfiler::enrichKEGG(target_interactors,organism = 'hsa')@result - else res <- clusterProfiler::enrichGO(target_interactors, 'org.Hs.eg.db', ont = db)@result - res <- res[res$p.adjust<0.05,] - if (nrow(res) > 25) res <- res[1:25,] - res - }) - } - ) + }) + output$table <- renderTable({ + validate( + need(input$tissue != '', NULL), + need( + input$current_node_id != '' & + input$current_node_id %in% top_targets, + 'Please select a target gene' + ) + ) + current_target_path <- + sapply(all_target_path[input$tissue], function(x) + x[input$current_node_id]) + current_target_path <- + Reduce(igraph::intersection, current_target_path) + target_interactors <- + unique(c(igraph::ends(g, current_target_path))) + if (db == 'kegg') + res <- + clusterProfiler::enrichKEGG(target_interactors, organism = 'hsa')@result + else + res <- + clusterProfiler::enrichGO(target_interactors, 'org.Hs.eg.db', ont = db)@result + res <- res[res$p.adjust < 0.05, ] + if (nrow(res) > 25) + res <- res[1:25, ] + res + }) + } + ) } - - - - - - - - - diff --git a/R/script_integration_efficacy_scores.R b/R/script_integration_efficacy_scores.R old mode 100755 new mode 100644 index f80ca52..79686ad --- a/R/script_integration_efficacy_scores.R +++ b/R/script_integration_efficacy_scores.R @@ -9,16 +9,18 @@ #'using max and harmonic sum functions (see \insertRef{Failli2019}{ThETA} for details). #'@export #'@importFrom Rdpack reprompt -integrate.scores <- function(data, col.scores = NULL){ - if(is.null(col.scores) | length(col.scores) < 2) - stop("Please indicate, at least, two different column-scores.") +integrate.scores <- function(data, col.scores = NULL) { + if (is.null(col.scores) | length(col.scores) < 2) + stop("Please indicate, at least, two different column-scores.") data[is.na(data)] <- 0 print(dim(data)) data$MAX <- rep(0, nrow(data)) data$HS <- rep(0, nrow(data)) - for(i in 1:nrow(data)){ + for (i in 1:nrow(data)) { data$MAX[i] <- max(data[i, col.scores]) - data$HS[i] <- sum(sort(data[i,col.scores],decreasing=T)/(1:length(data[i,col.scores]))^2) + data$HS[i] <- + sum(sort(data[i, col.scores], decreasing = T) / (1:length(data[i, col.scores])) ^ + 2) } return(data) } diff --git a/R/script_modulation_score.R b/R/script_modulation_score.R old mode 100755 new mode 100644 index 87ef250..a294f57 --- a/R/script_modulation_score.R +++ b/R/script_modulation_score.R @@ -20,71 +20,121 @@ #'@importFrom reshape2 melt #'@importFrom data.table as.data.table #'@importFrom scales rescale -modulation.score <- function(geneSets = NULL){ - if(is.null(geneSets)){ - diff_exp_gene_sets <- MIGSA::downloadEnrichrGeneSets(c('Disease_Perturbations_from_GEO_up', - 'Disease_Perturbations_from_GEO_down', - 'Single_Gene_Perturbations_from_GEO_up', - 'Single_Gene_Perturbations_from_GEO_down')) +modulation.score <- function(geneSets = NULL) { + if (is.null(geneSets)) { + diff_exp_gene_sets <- + MIGSA::downloadEnrichrGeneSets( + c( + 'Disease_Perturbations_from_GEO_up', + 'Disease_Perturbations_from_GEO_down', + 'Single_Gene_Perturbations_from_GEO_up', + 'Single_Gene_Perturbations_from_GEO_down' + ) + ) geneSets <- list() - for(i in names(diff_exp_gene_sets)){ + for (i in names(diff_exp_gene_sets)) { x <- diff_exp_gene_sets[[i]] geneSets[[i]] <- list() - is_disease<-grepl('Disease_Perturbations',i) - for(j in 1:length(x)) { - geneSets[[i]][[j]] <- list(geneIds=x[[j]]@geneIds,Description=x[[j]]@setName) - temp <- strsplit(geneSets[[i]][[j]]$Description,' ')[[1]] - idx <- length(temp)-4 - if(is_disease) { - geneSets[[i]][[j]]$Description <- tolower(paste(temp[1:(idx-1)],collapse=' ')) + is_disease <- grepl('Disease_Perturbations', i) + for (j in 1:length(x)) { + geneSets[[i]][[j]] <- + list(geneIds = x[[j]]@geneIds, + Description = x[[j]]@setName) + temp <- strsplit(geneSets[[i]][[j]]$Description, ' ')[[1]] + idx <- length(temp) - 4 + if (is_disease) { + geneSets[[i]][[j]]$Description <- + tolower(paste(temp[1:(idx - 1)], collapse = ' ')) names(geneSets[[i]])[j] <- temp[idx] } else{ - geneSets[[i]][[j]]$Description <- paste(temp[2:idx],collapse=' ') + geneSets[[i]][[j]]$Description <- paste(temp[2:idx], collapse = ' ') names(geneSets[[i]])[j] <- toupper(temp[1]) } - if(is_disease){ - names(geneSets[[i]])[grepl("^[0-9]+$",names(geneSets[[i]]))] <- - paste('DOID:',names(geneSets[[i]])[grepl("^[0-9]+$",names(geneSets[[i]]))],sep='') - names(geneSets[[i]])[grepl("^C[0-9]+$",names(geneSets[[i]]))] <- - paste('CUI:',names(geneSets[[i]])[grepl("^C[0-9]+$",names(geneSets[[i]]))],sep='') - names(geneSets[[i]])<-gsub('-',':',names(geneSets[[i]])) + if (is_disease) { + names(geneSets[[i]])[grepl("^[0-9]+$", names(geneSets[[i]]))] <- + paste('DOID:', names(geneSets[[i]])[grepl("^[0-9]+$", names(geneSets[[i]]))], sep = + '') + names(geneSets[[i]])[grepl("^C[0-9]+$", names(geneSets[[i]]))] <- + paste('CUI:', names(geneSets[[i]])[grepl("^C[0-9]+$", names(geneSets[[i]]))], sep = + '') + names(geneSets[[i]]) <- gsub('-', ':', names(geneSets[[i]])) } } } } - prtrb_specular <- lapply(seq(1,4,by=2),function(i) geneSets[c(i,i+1)]) + prtrb_specular <- + lapply(seq(1, 4, by = 2), function(i) + geneSets[c(i, i + 1)]) prtrb_specular[[2]] <- rev(prtrb_specular[[2]]) - occ.<-mapply(function(x,y) sapply(x,function(i)sapply(y,function(j) length(intersect(i$geneIds,j$geneIds)))), - prtrb_specular[[1]], prtrb_specular[[2]],SIMPLIFY=F) - names(occ.)<-c('up-down','down-up') + occ. <- + mapply( + function(x, y) + sapply(x, function(i) + sapply(y, function(j) + length( + intersect(i$geneIds, j$geneIds) + ))), + prtrb_specular[[1]], + prtrb_specular[[2]], + SIMPLIFY = F + ) + names(occ.) <- c('up-down', 'down-up') ### Calculating the composite z-scores for each disease-gene perturbation interaction - z1 <-lapply(occ.,function(x)t(apply(x,1,function(y)(y-mean(y))/sd(y)))) - z2 <-lapply(occ.,function(x)apply(x,2,function(y)(y-mean(y))/sd(y))) - Z <- mapply('+',z1,z2,SIMPLIFY = F) + z1 <- + lapply(occ., function(x) + t(apply(x, 1, function(y) + (y - mean(y))/sd(y)))) + z2 <- + lapply(occ., function(x) + apply(x, 2, function(y) + (y - mean(y))/sd(y))) + Z <- mapply('+', z1, z2, SIMPLIFY = F) ### Composite score - Z[['both']] <- (Z[[1]]+Z[[2]])/2 + Z[['both']] <- (Z[[1]] + Z[[2]]) / 2 ### Removing indexes of perturbations whose gene signatures correspond to few human orthologs - len <- lapply(geneSets,function(x)sapply(x,function(y)length(y$geneIds))) - out <- sapply(len,function(x){q<-quantile(x);q[2]-1.5*(q[4]-q[2])}) - idx <- mapply(function(x,i)x>i,len,out,SIMPLIFY=F) - idx <- lapply(seq(1,4,by=2),function(i)do.call('&',idx[c(i,i+1)])) - mat <- Z$both[idx[[2]],idx[[1]]] + len <- + lapply(geneSets, function(x) + sapply(x, function(y) + length(y$geneIds))) + out <- + sapply(len, function(x) { + q <- quantile(x) + q[2] - 1.5 * (q[4] - q[2]) + }) + idx <- mapply(function(x, i) + x > i, len, out, SIMPLIFY = F) + idx <- lapply(seq(1, 4, by = 2), function(i) + do.call('&', idx[c(i, i + 1)])) + mat <- Z$both[idx[[2]], idx[[1]]] ### Aggregating target-disease pairs by max score df <- reshape2::melt(mat, value.name = "modscore") - colnames(df)[1:2] <- c('target.id','disease.id') - annotation <- expand.grid(target.modulationType=unlist(lapply(geneSets[[3]][idx[[2]]],'[',2),use.names = F), - disease.name=unlist(lapply(geneSets[[1]][idx[[1]]],'[',2),use.names = F),KEEP.OUT.ATTRS = F) - df <- cbind(df,annotation) + colnames(df)[1:2] <- c('target.id', 'disease.id') + annotation <- + expand.grid( + target.modulationType = unlist(lapply(geneSets[[3]][idx[[2]]], '[', 2), use.names = F), + disease.name = unlist(lapply(geneSets[[1]][idx[[1]]], '[', 2), use.names = F), + KEEP.OUT.ATTRS = F + ) + df <- cbind(df, annotation) dt <- data.table::as.data.table(df) - dt <- dt[, .SD[which.max(modscore)], by=list(target.id,disease.id)] + dt <- + dt[, data.table::.SD[which.max(modscore)], by = list(target.id, disease.id)] ### Rescaling scores to [0-1] | Setting outliers above Q3 + 1.5 IQR equal to 1 q <- quantile(dt$modscore) - outliers <- q[4]+(1.5*(q[4]-q[2])) - dt$modscore <- scales::rescale(dt$modscore,from=c(min(dt$modscore),outliers),to=c(0,1)) - dt$modscore[dt$modscore>1] <- 1 + outliers <- q[4] + (1.5 * (q[4] - q[2])) + dt$modscore <- + scales::rescale(dt$modscore, + from = c(min(dt$modscore), outliers), + to = c(0, 1)) + dt$modscore[dt$modscore > 1] <- 1 ### Generating final data frame df <- as.data.frame(dt) - df[] <- lapply(df, function(x) if(is.factor(x)) as.character(x) else x) + df[] <- + lapply(df, function(x) + if (is.factor(x)) + as.character(x) + else + x) return(df) } diff --git a/R/script_node_centrality_scores.R b/R/script_node_centrality_scores.R old mode 100755 new mode 100644 index e2a1b6d..e314407 --- a/R/script_node_centrality_scores.R +++ b/R/script_node_centrality_scores.R @@ -7,8 +7,8 @@ #'@param x numeric, a vector of values. #'@param na.rm logical, whether or not to remove NA values from the calculation. #'@return The L2 norm of \code{x} -l2norm <- function (x, na.rm = TRUE) { - return(sqrt(mean(abs(x)^2, na.rm = TRUE))) +l2norm <- function(x, na.rm = TRUE) { + return(sqrt(mean(abs(x) ^ 2, na.rm = TRUE))) } #'Calculate The Geometric Mean #' @@ -19,7 +19,7 @@ l2norm <- function (x, na.rm = TRUE) { #'@param x numeric, a vector of values. #'@param na.rm logical, whether or not to remove NA values from the calculation. #'@return the geometric mean of \code{x} -geo.mean <- function (x, na.rm = TRUE) { +geo.mean <- function(x, na.rm = TRUE) { return(exp(mean(log(x), na.rm = TRUE))) } @@ -36,16 +36,24 @@ geo.mean <- function (x, na.rm = TRUE) { #'@return a list with two components:\cr #' -\strong{TopK} a matrix with 4 columns corresponding to rankings by each of the 4 aggregation functions;\cr #' -\strong{Scores} a matrix with 4 columns corresponding to the Borda scores from each of the 4 aggregation functions. -Borda <- function(input){ +Borda <- function(input) { aggreg.function = c("mean", "median", "geo.mean", "l2norm") - allranks = data.frame(matrix(0, nrow = nrow(input), ncol = length(aggreg.function))) + allranks = data.frame(matrix( + 0, + nrow = nrow(input), + ncol = length(aggreg.function) + )) names(allranks) = aggreg.function - allfun = data.frame(matrix(0, nrow = nrow(input), ncol = length(aggreg.function))) + allfun = data.frame(matrix( + 0, + nrow = nrow(input), + ncol = length(aggreg.function) + )) names(allfun) = aggreg.function for (i in 1:length(aggreg.function)) { allfun[, i] = apply(input, 1, aggreg.function[i], na.rm = TRUE) - allranks[, i] = rank(allfun[,i],ties.method ='random') + allranks[, i] = rank(allfun[, i], ties.method = 'random') } results = list(allranks, allfun) names(results) = c("TopK", "Scores") @@ -83,81 +91,137 @@ Borda <- function(input){ #'@importFrom doParallel registerDoParallel #'@importFrom foreach foreach %dopar% #'@importFrom mltools bin_data -node.centrality <- function(ppi_network, tissue_expr_data, agg_function = 4, directed_network=F, parallel = NULL, verbose = FALSE) { - if(length(rownames(tissue_expr_data))==0|length(colnames(tissue_expr_data))==0){ - stop('Both colnames and rownames for tissue_expr_data must be provided') +node.centrality <- + function(ppi_network, + tissue_expr_data, + agg_function = 4, + directed_network = F, + parallel = NULL, + verbose = FALSE) { + if (length(rownames(tissue_expr_data)) == 0 | + length(colnames(tissue_expr_data)) == 0) { + stop('Both colnames and rownames for tissue_expr_data must be provided') + } + for (i in 1:2) + ppi_network[, i] <- as.character(ppi_network[, i]) + ppi_network <- ppi_network[!duplicated(ppi_network[, 1:2]), ] + ppi_network_size <- nrow(ppi_network) + if (directed_network) + idx <- ppi_network[, 1] %in% rownames(tissue_expr_data) + else + idx <- + ppi_network[, 1] %in% rownames(tissue_expr_data) & + ppi_network[, 2] %in% rownames(tissue_expr_data) + ppi_network <- ppi_network[idx, ] + if (nrow(ppi_network) == 0) + stop('No corresponding IDs between ppi_network and tissue_expr_data.') + else if (ppi_network_size != nrow(ppi_network)) { + if (verbose) + print(paste( + nrow(ppi_network), + 'out of', + ppi_network_size, + 'network edges selected.', + sep = ' ' + )) + } + if (!directed_network) { + if (verbose) + print('Undirect network. Duplicating network edges...') + ppi_network_rev <- ppi_network[, c(2:1)] + colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] + ppi_network <- rbind(ppi_network[, 1:2], ppi_network_rev) + } + # rescaling z-scores into positive values + tissue_expr_scaled <- + scales::rescale(tissue_expr_data, c(.Machine$double.eps, 1)) + tissue_expr_scaled_btw <- + scales::rescale(tissue_expr_data, c(1, .Machine$double.eps)) + # building a graph from an edge list + g = igraph::graph_from_edgelist(as.matrix(ppi_network[, 1:2]), directed = + T) + # computing node degree, clustering coefficient and betweenness per tissue + node_degree <- c() + node_cc <- c() + node_btw <- c() + if (verbose) + print("Compiling node centrality scores...") + if (is.null(parallel)) { + node_degree <- lapply(colnames(tissue_expr_data), function(i) { + igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[, 1], i] + igraph::strength(g, mode = 'in') + }) + node_cc <- lapply(colnames(tissue_expr_data), function(i) { + igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[, 1], i] + igraph::transitivity(igraph::as.undirected(g), + type = 'barrat', + isolates = 'zero') + }) + node_btw <- lapply(colnames(tissue_expr_data), function(i) { + igraph::E(g)$weight <- tissue_expr_scaled_btw[ppi_network[, 1], i] + igraph::betweenness(g) + }) + } + else { + cl <- snow::makeCluster(parallel) + doParallel::registerDoParallel(cl) + `%dopar%` <- foreach::`%dopar%` + node_degree <- + foreach::foreach(i = colnames(tissue_expr_data)) %dopar% { + igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[, 1], i] + igraph::strength(g, mode = 'in') + } + names(node_degree) <- colnames(tissue_expr_data) + node_cc <- foreach(i = colnames(tissue_expr_data)) %dopar% { + igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[, 1], i] + igraph::transitivity(igraph::as.undirected(g), + type = 'barrat', + isolates = 'zero') + } + names(node_cc) <- colnames(tissue_expr_data) + node_btw <- foreach(i = colnames(tissue_expr_data)) %dopar% { + igraph::E(g)$weight <- tissue_expr_scaled_btw[ppi_network[, 1], i] + igraph::betweenness(g) + } + names(node_btw) <- colnames(tissue_expr_data) + snow::stopCluster(cl) + } + if (verbose) + print("Merging different node centrality scores...") + # aggregating individual centrality rankings with Borda (l2norm aggregation function) + node_cent_scores <- + mapply(function(x, y, z) + cbind(degree = x, cc = y, btw = z), + node_degree, + node_cc, + node_btw, + SIMPLIFY = F) + node_cent_ranks <- + lapply(node_cent_scores, function(x) + apply(x, 2, rank, ties.method = 'random')) + borda_var <- lapply(node_cent_ranks, Borda) + rank_agg_function <- + sapply(borda_var, function(x) + x$TopK[, agg_function]) + rownames(rank_agg_function) <- igraph::V(g)$name + if (verbose) + print("Discretizing the node centrality score...") + # assigning each node a discrete value between 0.25 and 1 depending on its centrality ranking quartile + chunk <- as.data.frame(apply(rank_agg_function, 2, function(x) + mltools::bin_data(x, bins = quantile(x), boundaryType = "[lorc"))) + W <- + lapply(chunk, function(x) + as.numeric(factor(x, levels = rev(levels( + x + )))) / 4) + for (i in 1:length(W)) + names(W[[i]]) <- rownames(node_cent_scores[[1]]) + return( + list( + scores = node_cent_scores, + ranks = node_cent_ranks, + borda = rank_agg_function, + borda.disc = W + ) + ) } - for(i in 1:2) ppi_network[,i]<-as.character(ppi_network[,i]) - ppi_network<-ppi_network[!duplicated(ppi_network[,1:2]),] - ppi_network_size<-nrow(ppi_network) - if(directed_network) idx<-ppi_network[,1]%in%rownames(tissue_expr_data) - else idx<-ppi_network[,1]%in%rownames(tissue_expr_data) & ppi_network[,2]%in%rownames(tissue_expr_data) - ppi_network<-ppi_network[idx,] - if(nrow(ppi_network)==0) stop('No corresponding IDs between ppi_network and tissue_expr_data.') - else if(ppi_network_size!=nrow(ppi_network)){ - if(verbose) print(paste(nrow(ppi_network),'out of',ppi_network_size,'network edges selected.', sep=' ')) - } - if(!directed_network){ - if(verbose) print('Undirect network. Duplicating network edges...') - ppi_network_rev <- ppi_network[,c(2:1)] - colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] - ppi_network <- rbind(ppi_network[,1:2],ppi_network_rev) - } - # rescaling z-scores into positive values - tissue_expr_scaled <- scales::rescale(tissue_expr_data, c(.Machine$double.eps,1)) - tissue_expr_scaled_btw <- scales::rescale(tissue_expr_data, c(1,.Machine$double.eps)) - # building a graph from an edge list - g = igraph::graph_from_edgelist(as.matrix(ppi_network[,1:2]), directed=T) - # computing node degree, clustering coefficient and betweenness per tissue - node_degree <- c() - node_cc <- c() - node_btw <- c() - if(verbose) print("Compiling node centrality scores...") - if(is.null(parallel)) { - node_degree <- lapply(colnames(tissue_expr_data),function(i){ - igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[,1], i] - igraph::strength(g, mode='in')}) - node_cc <- lapply(colnames(tissue_expr_data),function(i){ - igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[,1], i] - igraph::transitivity(igraph::as.undirected(g), type='barrat',isolates='zero')}) - node_btw <- lapply(colnames(tissue_expr_data),function(i){ - igraph::E(g)$weight <- tissue_expr_scaled_btw[ppi_network[,1],i] - igraph::betweenness(g)}) - } - else { - cl <- snow::makeCluster(parallel) - doParallel::registerDoParallel(cl) - `%dopar%` <- foreach::`%dopar%` - node_degree <- foreach::foreach(i=colnames(tissue_expr_data)) %dopar% { - igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[,1], i] - igraph::strength(g, mode='in')} - names(node_degree) <- colnames(tissue_expr_data) - node_cc <- foreach(i=colnames(tissue_expr_data)) %dopar% { - igraph::E(g)$weight <- tissue_expr_scaled[ppi_network[,1], i] - igraph::transitivity(igraph::as.undirected(g), type='barrat',isolates='zero')} - names(node_cc) <- colnames(tissue_expr_data) - node_btw <- foreach(i=colnames(tissue_expr_data)) %dopar% { - igraph::E(g)$weight <- tissue_expr_scaled_btw[ppi_network[,1],i] - igraph::betweenness(g)} - names(node_btw)<-colnames(tissue_expr_data) - snow::stopCluster(cl) - } - if(verbose) print("Merging different node centrality scores...") - # aggregating individual centrality rankings with Borda (l2norm aggregation function) - node_cent_scores <- mapply(function(x,y,z) cbind(degree=x,cc=y,btw=z), node_degree, node_cc, node_btw, SIMPLIFY=F) - node_cent_ranks <- lapply(node_cent_scores, function(x) apply(x, 2, rank, ties.method ='random')) - borda_var <- lapply(node_cent_ranks, Borda) - rank_agg_function <- sapply(borda_var, function(x)x$TopK[,agg_function]) - rownames(rank_agg_function) <- igraph::V(g)$name - if(verbose) print("Discretizing the node centrality score...") - # assigning each node a discrete value between 0.25 and 1 depending on its centrality ranking quartile - chunk <- as.data.frame(apply(rank_agg_function, 2, function(x) - mltools::bin_data(x,bins=quantile(x), boundaryType = "[lorc"))) - W <- lapply(chunk,function(x) as.numeric(factor(x, levels=rev(levels(x))))/4) - for (i in 1:length(W)) names(W[[i]]) <- rownames(node_cent_scores[[1]]) - return(list(scores = node_cent_scores, - ranks = node_cent_ranks, - borda = rank_agg_function, - borda.disc = W)) -} - diff --git a/R/script_tissue_specific_efficacy_score.R b/R/script_tissue_specific_efficacy_score.R old mode 100755 new mode 100644 index 00698b5..e0ea7cb --- a/R/script_tissue_specific_efficacy_score.R +++ b/R/script_tissue_specific_efficacy_score.R @@ -25,63 +25,109 @@ #'@importFrom Rdpack reprompt #'@importFrom igraph E graph_from_edgelist distances #'@importFrom scales rescale -weighted.shortest.path <- function(disease_genes, ppi_network, directed_network = F, - tissue_expr_data, dis_relevant_tissues, W, cutoff = 1.6, - verbose=F) { - # - if(is.null(rownames(tissue_expr_data))|is.null(colnames(tissue_expr_data))){ - stop('Both colnames and rownames for tissue_expr_data must be provided.') - } - for(i in 1:2) ppi_network[,i] <- as.character(ppi_network[,i]) - ppi_network <- ppi_network[!duplicated(ppi_network[,1:2]),] - ppi_network_size <- nrow(ppi_network) - if(directed_network) idx <- ppi_network[,1]%in%rownames(tissue_expr_data) - else idx <- ppi_network[,1]%in%rownames(tissue_expr_data) & ppi_network[,2]%in%rownames(tissue_expr_data) - ppi_network <- ppi_network[idx,] - if(nrow(ppi_network)==0) stop('No corresponding IDs between ppi_network and tissue_expr_data.') - else if(ppi_network_size!=nrow(ppi_network)){ - if(verbose) print(paste(nrow(ppi_network),'out of',ppi_network_size,'network edges selected.', sep=' ')) - } - if(!directed_network){ - #if(verbose) print('Undirect network. Converting to direct network...') - ppi_network_rev <- ppi_network[,c(2:1)] - colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] - ppi_network <- rbind(ppi_network[,1:2],ppi_network_rev) - } - disease_genes_size <- length(disease_genes) - disease_genes <- intersect(disease_genes,ppi_network[,2]) - if(length(disease_genes)==0) stop('No disease-associated ID match with ppi_network and tissue_expr_data!') - else if( disease_genes_size!=length(disease_genes)){ - if(verbose) print(paste(length(disease_genes),'disease-associated IDs are reachable from the network.',sep=' ')) - } - tissue_expr_data <- scales::rescale(tissue_expr_data,c(1,.Machine$double.eps)) - g <- igraph::graph_from_edgelist(as.matrix(ppi_network[,1:2]), directed=T) - if(is.vector(dis_relevant_tissues) == FALSE) stop('Argument dis_relevant_tissues is not a vector!') - if(is.null(names(dis_relevant_tissues))) stop('Names for dis_relevant_tissues must be provided!') - # selecting relevenat tissues - sign_tiss <- names(which(dis_relevant_tissues >= cutoff)) - if(length(sign_tiss) != 0){ - if(verbose) print(paste(length(sign_tiss),' tissue/s significant for given disease.',sep='')) - target_path_mean <- NULL - sh.path <- lapply(sign_tiss,function(i){ - igraph::E(g)$weight <- tissue_expr_data[ppi_network[,1],i] - igraph::distances(g, to=disease_genes, mode = "out", weights = NULL, algorithm = "dijkstra") - }) - names(sh.path) <- sign_tiss - sh.path <- lapply(sh.path,function(x){ - x[is.infinite(x)]<-(3*max(x[which(is.finite(x))],na.rm = TRUE)) +weighted.shortest.path <- + function(disease_genes, + ppi_network, + directed_network = F, + tissue_expr_data, + dis_relevant_tissues, + W, + cutoff = 1.6, + verbose = F) { + # + if (is.null(rownames(tissue_expr_data)) | + is.null(colnames(tissue_expr_data))) { + stop('Both colnames and rownames for tissue_expr_data must be provided.') + } + for (i in 1:2) + ppi_network[, i] <- as.character(ppi_network[, i]) + ppi_network <- ppi_network[!duplicated(ppi_network[, 1:2]), ] + ppi_network_size <- nrow(ppi_network) + if (directed_network) + idx <- ppi_network[, 1] %in% rownames(tissue_expr_data) + else + idx <- + ppi_network[, 1] %in% rownames(tissue_expr_data) & + ppi_network[, 2] %in% rownames(tissue_expr_data) + ppi_network <- ppi_network[idx, ] + if (nrow(ppi_network) == 0) + stop('No corresponding IDs between ppi_network and tissue_expr_data.') + else if (ppi_network_size != nrow(ppi_network)) { + if (verbose) + print(paste( + nrow(ppi_network), + 'out of', + ppi_network_size, + 'network edges selected.', + sep = ' ' + )) + } + if (!directed_network) { + #if(verbose) print('Undirect network. Converting to direct network...') + ppi_network_rev <- ppi_network[, c(2:1)] + colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] + ppi_network <- rbind(ppi_network[, 1:2], ppi_network_rev) + } + disease_genes_size <- length(disease_genes) + disease_genes <- intersect(disease_genes, ppi_network[, 2]) + if (length(disease_genes) == 0) + stop('No disease-associated ID match with ppi_network and tissue_expr_data!') + else if (disease_genes_size != length(disease_genes)) { + if (verbose) + print(paste( + length(disease_genes), + 'disease-associated IDs are reachable from the network.', + sep = ' ' + )) + } + tissue_expr_data <- + scales::rescale(tissue_expr_data, c(1, .Machine$double.eps)) + g <- + igraph::graph_from_edgelist(as.matrix(ppi_network[, 1:2]), directed = T) + if (is.vector(dis_relevant_tissues) == FALSE) + stop('Argument dis_relevant_tissues is not a vector!') + if (is.null(names(dis_relevant_tissues))) + stop('Names for dis_relevant_tissues must be provided!') + # selecting relevenat tissues + sign_tiss <- names(which(dis_relevant_tissues >= cutoff)) + if (length(sign_tiss) != 0) { + if (verbose) + print(paste( + length(sign_tiss), + ' tissue/s significant for given disease.', + sep = '' + )) + target_path_mean <- NULL + sh.path <- lapply(sign_tiss, function(i) { + igraph::E(g)$weight <- tissue_expr_data[ppi_network[, 1], i] + igraph::distances( + g, + to = disease_genes, + mode = "out", + weights = NULL, + algorithm = "dijkstra" + ) + }) + names(sh.path) <- sign_tiss + sh.path <- lapply(sh.path, function(x) { + x[is.infinite(x)] <- (3 * max(x[which(is.finite(x))], na.rm = TRUE)) x }) - # weighting shortest paths according to disease-associeted genes discrete values - new_sh.path <- mapply(function(x,y) t(x)*y[colnames(x)],sh.path, W[sign_tiss], SIMPLIFY=F) - # averaging shortest paths across the disease-associated genes and the disease-relevant tissues - target_path <- sapply(new_sh.path,colMeans) - target_path_mean <- rowMeans(target_path) - return(list(shortest_paths = target_path, - shortest_paths_avg = target_path_mean)) + # weighting shortest paths according to disease-associeted genes discrete values + new_sh.path <- + mapply(function(x, y) + t(x) * y[colnames(x)], sh.path, W[sign_tiss], SIMPLIFY = F) + # averaging shortest paths across the disease-associated genes and the disease-relevant tissues + target_path <- sapply(new_sh.path, colMeans) + target_path_mean <- rowMeans(target_path) + return(list( + shortest_paths = target_path, + shortest_paths_avg = target_path_mean + )) + } + else + stop('No tissue significant for given disease!') } - else stop('No tissue significant for given disease!') -} #'Compile tissue-specific scores for a given disease. #' @@ -115,71 +161,137 @@ weighted.shortest.path <- function(disease_genes, ppi_network, directed_network #'@export #'@importFrom Rdpack reprompt #'@importFrom scales rescale -tissue.specific.scores <- function(disease_genes, ppi_network, directed_network = F, - tissue_expr_data, dis_relevant_tissues, W, cutoff = 1.6, - verbose = FALSE) { - if(is.null(disease_genes) | is.null(tissue_expr_data) | is.null(ppi_network) | is.null(dis_relevant_tissues)) - stop('Incomplete data input.') - # compile the wsp - wsp_list = weighted.shortest.path(disease_genes,ppi_network, - directed_network,tissue_expr_data, - dis_relevant_tissues,W, cutoff, verbose) - tissue_scores <- apply(wsp_list$shortest_paths, 2, function(x) { - q <- quantile(x) - outliers <- q[4]+(1.5*(q[4]-q[2])) - ts <- scales::rescale(x, from=c(min(x), outliers), to=c(1,0)) - ts[ts< 0] <- 0 - ts - }) - dt <- data.frame(tissue_scores, avg_tissue_score = rowMeans(tissue_scores[,,drop=F])) - return(dt) -} +tissue.specific.scores <- + function(disease_genes, + ppi_network, + directed_network = F, + tissue_expr_data, + dis_relevant_tissues, + W, + cutoff = 1.6, + verbose = FALSE) { + if (is.null(disease_genes) | + is.null(tissue_expr_data) | + is.null(ppi_network) | is.null(dis_relevant_tissues)) + stop('Incomplete data input.') + # compile the wsp + wsp_list = weighted.shortest.path( + disease_genes, + ppi_network, + directed_network, + tissue_expr_data, + dis_relevant_tissues, + W, + cutoff, + verbose + ) + tissue_scores <- apply(wsp_list$shortest_paths, 2, function(x) { + q <- quantile(x) + outliers <- q[4] + (1.5 * (q[4] - q[2])) + ts <- scales::rescale(x, from = c(min(x), outliers), to = c(1, 0)) + ts[ts < 0] <- 0 + ts + }) + dt <- + data.frame(tissue_scores, avg_tissue_score = rowMeans(tissue_scores[, , drop = + F])) + return(dt) + } #'@rdname tissue.specific.scores #'@export #'@importFrom scales rescale #'@importFrom snow makeCluster stopCluster #'@importFrom doParallel registerDoParallel #'@importFrom foreach foreach %dopar% -list.tissue.specific.scores <- function(disease_gene_list, ppi_network, directed_network = F, - tissue_expr_data, dis_relevant_tissues, W, cutoff = 1.6, - verbose = FALSE, parallel = NULL) { - require(foreach) - require(doParallel) - if(is.list(disease_gene_list) == FALSE) stop('Argument disease_gene_list is not a list!') - if(is.null(names(disease_gene_list))) stop('Names for disease_gene_list must be provided!') - if(is.matrix(dis_relevant_tissues) == FALSE) stop('Argument dis_relevant_tissues is not a matrix!') - if(is.null(rownames(dis_relevant_tissues))|is.null(colnames(dis_relevant_tissues))){ - stop('Both colnames and rownames for dis_relevant_tissues must be provided!') - } - common_diseases <- intersect(names(disease_gene_list), rownames(dis_relevant_tissues)) - if(length(common_diseases)==0) stop('No disease in common between disease_gene_list and dis_relevant_tissues!') - else if(length(common_diseases) != length(names(disease_gene_list))){ - if(verbose) print(paste(length(common_diseases),'diseases in common between disease_gene_list and dis_relevant_tissues.',sep=' ')) - } - tissue_scores <- NULL - if(!is.null(parallel)) { - cl <- snow::makeCluster(parallel) - doParallel::registerDoParallel(cl) - `%dopar%` <- foreach::`%dopar%` - wsp <- foreach::foreach(i=common_diseases,.export = 'weighted.shortest.path',.final = function(i) setNames(i, common_diseases)) %dopar% - weighted.shortest.path(disease_gene_list[[i]], ppi_network, directed_network, tissue_expr_data, dis_relevant_tissues[i,], W, cutoff = 1.6) - snow::stopCluster(cl) - } - else { - warning("A parallel computation is highly recommended",immediate. = T) - wsp <- sapply(common_diseases,function(i) weighted.shortest.path(disease_gene_list[[i]], ppi_network, directed_network, tissue_expr_data, dis_relevant_tissues[i,], W, cutoff = 1.6),simplify=F) +list.tissue.specific.scores <- + function(disease_gene_list, + ppi_network, + directed_network = F, + tissue_expr_data, + dis_relevant_tissues, + W, + cutoff = 1.6, + verbose = FALSE, + parallel = NULL) { + require(foreach) + require(doParallel) + if (is.list(disease_gene_list) == FALSE) + stop('Argument disease_gene_list is not a list!') + if (is.null(names(disease_gene_list))) + stop('Names for disease_gene_list must be provided!') + if (is.matrix(dis_relevant_tissues) == FALSE) + stop('Argument dis_relevant_tissues is not a matrix!') + if (is.null(rownames(dis_relevant_tissues)) | + is.null(colnames(dis_relevant_tissues))) { + stop('Both colnames and rownames for dis_relevant_tissues must be provided!') + } + common_diseases <- + intersect(names(disease_gene_list), rownames(dis_relevant_tissues)) + if (length(common_diseases) == 0) + stop('No disease in common between disease_gene_list and dis_relevant_tissues!') + else if (length(common_diseases) != length(names(disease_gene_list))) { + if (verbose) + print( + paste( + length(common_diseases), + 'diseases in common between disease_gene_list and dis_relevant_tissues.', + sep = ' ' + ) + ) + } + tissue_scores <- NULL + if (!is.null(parallel)) { + cl <- snow::makeCluster(parallel) + doParallel::registerDoParallel(cl) + `%dopar%` <- foreach::`%dopar%` + wsp <- + foreach::foreach( + i = common_diseases, + .export = 'weighted.shortest.path', + .final = function(i) + setNames(i, common_diseases) + ) %dopar% + weighted.shortest.path( + disease_gene_list[[i]], + ppi_network, + directed_network, + tissue_expr_data, + dis_relevant_tissues[i, ], + W, + cutoff = 1.6 + ) + snow::stopCluster(cl) + } + else { + warning("A parallel computation is highly recommended", immediate. = T) + wsp <- + sapply(common_diseases, function(i) + weighted.shortest.path( + disease_gene_list[[i]], + ppi_network, + directed_network, + tissue_expr_data, + dis_relevant_tissues[i, ], + W, + cutoff = 1.6 + ), simplify = F) + } + wsp <- do.call(function(...) + mapply(list, ..., SIMPLIFY = F), wsp) + tissue_scores <- + lapply(wsp$shortest_paths, function(x) + apply(x, 2, function(y) { + q <- quantile(y) + outliers <- q[4] + (1.5 * (q[4] - q[2])) + ts <- scales::rescale(y, from = c(min(x), outliers), to = c(1, 0)) + ts[ts < 0] <- 0 + ts + })) + tissue_scores <- + lapply(tissue_scores, function(x) + data.frame(x, avg_tissue_score = rowMeans(x[, , drop = F]))) + return(tissue_scores) } - wsp <- do.call(function(...)mapply(list,...,SIMPLIFY = F),wsp) - tissue_scores <- lapply(wsp$shortest_paths, function(x) apply(x, 2, function(y) { - q <- quantile(y) - outliers <- q[4]+(1.5*(q[4]-q[2])) - ts <- scales::rescale(y, from=c(min(x), outliers), to=c(1,0)) - ts[ts< 0] <- 0 - ts - })) - tissue_scores <- lapply(tissue_scores,function(x)data.frame(x, avg_tissue_score = rowMeans(x[,,drop=F]))) - return(tissue_scores) -} #'Compile tissue-specific networks and, within these, set of genes that are relevant for the selected targets. @@ -215,60 +327,125 @@ list.tissue.specific.scores <- function(disease_gene_list, ppi_network, directed #'@importFrom igraph V #'@importFrom igraph E #'@importFrom igraph shortest_paths -build.tissue.specific.networks <- function(tissue_scores, disease_genes, ppi_network, - directed_network = F, tissue_expr_data, - top_targets = NULL, rwr_restart = 0.75, - rwr_norm = "quantile", rwr_cutoff = 0.001, - verbose = FALSE){ - # check - if(is.null(top_targets)) stop('Please specifiy a set of targets (ENTREZ ids)!') - if(!is.character(ppi_network[,1])) ppi_network[,1] <- as.character(ppi_network[,1]) - if(!is.character(ppi_network[,2])) ppi_network[,2] <- as.character(ppi_network[,2]) - ppi_network <- ppi_network[!duplicated(ppi_network[,1:2]),] - ppi_network_node<-unique(unlist(ppi_network[,1:2])) - universe <- intersect(ppi_network_node,rownames(tissue_expr_data)) - if(length(universe)==0) stop('No corresponding IDs between ppi_network and tissue_expr_data!') - else if(length(universe)!=length(ppi_network_node)| - length(universe)!=nrow(tissue_expr_data)){ - if (verbose) print(paste(length(universe),'IDs in common between ppi_network and tissue_expr_data will be considered.', sep=' ')) - tissue_expr_data<-tissue_expr_data[universe,] - } - ## scale tissue expression scores - tissue_expr_data <- scales::rescale(tissue_expr_data,c(1,.Machine$double.eps)) - ## select the sign tissues - sign_tiss <- colnames(tissue_scores)[-ncol(tissue_scores)] - tissue_spec_network <- list() - shp_top_genes <- list() - rwr_top_genes <- list() - for(i in 1: length(sign_tiss)) { - g <- igraph::graph_from_edgelist(as.matrix(ppi_network[,1:2]), directed=T) - igraph::E(g)$weight <- tissue_expr_data[ppi_network[,1], sign_tiss[i]] - # select disease genes active in this tissue and search for shortest paths - tissue_disease_genes <- intersect(disease_genes, igraph::V(g)$name) - shp_top_genes[[length(shp_top_genes) + 1]] <- lapply(top_targets, function(x) { - ss = igraph::distances(g, v=x, to=tissue_disease_genes, mode = "out", weights = NULL, algorithm = "dijkstra") - if(verbose) print(paste("Targets ", x, " cannot reach ", length(which(is.infinite(ss)))," nodes", sep="")) - suppressWarnings(selected_nodes <- unique(unlist(igraph::shortest_paths(g, from=x, to=tissue_disease_genes, mode = "out", weights = NULL, output='vpath')$vpath))) - igraph::V(g)$name[selected_nodes] - }) - names(shp_top_genes[[length(shp_top_genes)]]) <- top_targets - # apply random walk on the top targets - aSeeds <- rep(0, length(igraph::V(g)$name)) - aSeeds[which((igraph::V(g)$name%in%top_targets) == TRUE)] <- 1 - setSeeds <- data.frame(aSeeds) - rownames(setSeeds) <- igraph::V(g)$name - rwr_top_genes[[length(rwr_top_genes)+1]] <- as.numeric(dnet::dRWR(g, normalise = "none", setSeeds = setSeeds, restart = rwr_restart, - normalise.affinity.matrix = rwr_norm, parallel = FALSE, multicores = NULL, verbose = F)) - names(rwr_top_genes[[length(rwr_top_genes)]]) = igraph::V(g)$name - rwr_top_genes[[length(rwr_top_genes)]] = names(rwr_top_genes[[length(rwr_top_genes)]])[which(rwr_top_genes[[length(rwr_top_genes)]] >= rwr_cutoff)] - # - tissue_spec_network[[length(tissue_spec_network) + 1]] = g +build.tissue.specific.networks <- + function(tissue_scores, + disease_genes, + ppi_network, + directed_network = F, + tissue_expr_data, + top_targets = NULL, + rwr_restart = 0.75, + rwr_norm = "quantile", + rwr_cutoff = 0.001, + verbose = FALSE) { + # check + if (is.null(top_targets)) + stop('Please specifiy a set of targets (ENTREZ ids)!') + if (!is.character(ppi_network[, 1])) + ppi_network[, 1] <- as.character(ppi_network[, 1]) + if (!is.character(ppi_network[, 2])) + ppi_network[, 2] <- as.character(ppi_network[, 2]) + ppi_network <- ppi_network[!duplicated(ppi_network[, 1:2]), ] + ppi_network_node <- unique(unlist(ppi_network[, 1:2])) + universe <- intersect(ppi_network_node, rownames(tissue_expr_data)) + if (length(universe) == 0) + stop('No corresponding IDs between ppi_network and tissue_expr_data!') + else if (length(universe) != length(ppi_network_node) | + length(universe) != nrow(tissue_expr_data)) { + if (verbose) + print( + paste( + length(universe), + 'IDs in common between ppi_network and tissue_expr_data will be considered.', + sep = ' ' + ) + ) + tissue_expr_data <- tissue_expr_data[universe, ] + } + ## scale tissue expression scores + tissue_expr_data <- + scales::rescale(tissue_expr_data, c(1, .Machine$double.eps)) + ## select the sign tissues + sign_tiss <- colnames(tissue_scores)[-ncol(tissue_scores)] + tissue_spec_network <- list() + shp_top_genes <- list() + rwr_top_genes <- list() + for (i in 1:length(sign_tiss)) { + g <- + igraph::graph_from_edgelist(as.matrix(ppi_network[, 1:2]), directed = T) + igraph::E(g)$weight <- + tissue_expr_data[ppi_network[, 1], sign_tiss[i]] + # select disease genes active in this tissue and search for shortest paths + tissue_disease_genes <- + intersect(disease_genes, igraph::V(g)$name) + shp_top_genes[[length(shp_top_genes) + 1]] <- + lapply(top_targets, function(x) { + ss = igraph::distances( + g, + v = x, + to = tissue_disease_genes, + mode = "out", + weights = NULL, + algorithm = "dijkstra" + ) + if (verbose) + print(paste( + "Targets ", + x, + " cannot reach ", + length(which(is.infinite(ss))), + " nodes", + sep = "" + )) + suppressWarnings(selected_nodes <- + unique(unlist( + igraph::shortest_paths( + g, + from = x, + to = tissue_disease_genes, + mode = "out", + weights = NULL, + output = 'vpath' + )$vpath + ))) + igraph::V(g)$name[selected_nodes] + }) + names(shp_top_genes[[length(shp_top_genes)]]) <- top_targets + # apply random walk on the top targets + aSeeds <- rep(0, length(igraph::V(g)$name)) + aSeeds[which((igraph::V(g)$name %in% top_targets) == TRUE)] <- 1 + setSeeds <- data.frame(aSeeds) + rownames(setSeeds) <- igraph::V(g)$name + rwr_top_genes[[length(rwr_top_genes) + 1]] <- + as.numeric( + dnet::dRWR( + g, + normalise = "none", + setSeeds = setSeeds, + restart = rwr_restart, + normalise.affinity.matrix = rwr_norm, + parallel = FALSE, + multicores = NULL, + verbose = F + ) + ) + names(rwr_top_genes[[length(rwr_top_genes)]]) = igraph::V(g)$name + rwr_top_genes[[length(rwr_top_genes)]] = names(rwr_top_genes[[length(rwr_top_genes)]])[which(rwr_top_genes[[length(rwr_top_genes)]] >= rwr_cutoff)] + # + tissue_spec_network[[length(tissue_spec_network) + 1]] = g + } + names(rwr_top_genes) <- sign_tiss + names(shp_top_genes) <- sign_tiss + names(tissue_spec_network) <- sign_tiss + return( + list( + "tsn" = tissue_spec_network, + "shp" = shp_top_genes, + "rwr" = rwr_top_genes, + "universe" = universe + ) + ) } - names(rwr_top_genes) <- sign_tiss - names(shp_top_genes) <- sign_tiss - names(tissue_spec_network) <- sign_tiss - return(list("tsn"=tissue_spec_network, "shp" = shp_top_genes, "rwr" = rwr_top_genes, "universe" = universe)) -} #'Over-Representation Analysis. #' @@ -287,38 +464,53 @@ build.tissue.specific.networks <- function(tissue_scores, disease_genes, ppi_net #'@importFrom clusterProfiler enrichKEGG #'@importFrom clusterProfiler enrichGO #'@importFrom ReactomePA enrichPathway -generate.ora.data <- function(input, databases = c("GO", "KEGG"), - orgdb_go = 'org.Hs.eg.db', orgdb_kegg = 'hsa', - apval = 0.01, verbose = TRUE) { +generate.ora.data <- function(input, + databases = c("GO", "KEGG"), + orgdb_go = 'org.Hs.eg.db', + orgdb_kegg = 'hsa', + apval = 0.01, + verbose = TRUE) { list_annots <- list() annots <- c() - for(i in 1:length(input)) { - if("GO" %in% databases) { - list_annots[[length(list_annots) + 1]] <- clusterProfiler::enrichGO(unique(unlist(input[[i]])), - orgdb_go, ont = "BP", - pvalueCutoff = apval, - universe = input$universe) - annots <- c(annots,"GO") - if(verbose) print(paste("ORA completed for GO-", names(input)[i], ".", sep="")) + for (i in 1:length(input)) { + if ("GO" %in% databases) { + list_annots[[length(list_annots) + 1]] <- + clusterProfiler::enrichGO( + unique(unlist(input[[i]])), + orgdb_go, + ont = "BP", + pvalueCutoff = apval, + universe = input$universe + ) + annots <- c(annots, "GO") + if (verbose) + print(paste("ORA completed for GO-", names(input)[i], ".", sep = "")) } - if("KEGG" %in% databases) { - list_annots[[length(list_annots) + 1]] <- clusterProfiler::enrichKEGG(unique(unlist(input[[i]])), - organism = orgdb_kegg, - keyType = "kegg", - pvalueCutoff = apval, - universe = input$universe) - annots <- c(annots,"KEGG") - if(verbose) print(paste("ORA completed for KEGG-", names(input)[i], ".", sep="")) + if ("KEGG" %in% databases) { + list_annots[[length(list_annots) + 1]] <- + clusterProfiler::enrichKEGG( + unique(unlist(input[[i]])), + organism = orgdb_kegg, + keyType = "kegg", + pvalueCutoff = apval, + universe = input$universe + ) + annots <- c(annots, "KEGG") + if (verbose) + print(paste("ORA completed for KEGG-", names(input)[i], ".", sep = "")) } - if("REACTOME" %in% databases) { - list_annots[[length(list_annots) + 1]] <- ReactomePA::enrichPathway(unique(unlist(input[[i]])), - pvalueCutoff = apval, - readable = T) - annots <- c(annots,"REACTOME") - if(verbose) print(paste("ORA completed for REACTOME-", names(input)[i], ".", sep="")) + if ("REACTOME" %in% databases) { + list_annots[[length(list_annots) + 1]] <- + ReactomePA::enrichPathway(unique(unlist(input[[i]])), + pvalueCutoff = apval, + readable = T) + annots <- c(annots, "REACTOME") + if (verbose) + print(paste("ORA completed for REACTOME-", names(input)[i], ".", sep = + "")) } } - names(list_annots) <- paste(annots, names(input), sep="-") + names(list_annots) <- paste(annots, names(input), sep = "-") return(list_annots) } @@ -340,40 +532,60 @@ generate.ora.data <- function(input, databases = c("GO", "KEGG"), #'@importFrom enrichplot upsetplot #'@importFrom ggplot2 theme #'@importFrom ggplot2 element_text -generate.ora.plots <- function(ora_data, set_plots = c("dotplot", "emapplot", "cnetplot", "upsetplot"), - showCategory = 20, font_size = 8){ - list_plots <- list() - plots <- c() - for(i in 1:length(ora_data)) { - if("dotplot" %in% set_plots) { - list_plots[[length(list_plots)+1]] <- enrichplot::dotplot(ora_data[[i]], showCategory = showCategory, font.size = font_size) + - ggplot2::theme(legend.title=ggplot2::element_text(size=font_size), - legend.text=ggplot2::element_text(size=font_size)) - plots <- c(plots, paste("dotplot",names(ora_data)[i],sep="-")) - } - if("emapplot" %in% set_plots) { - list_plots[[length(list_plots)+1]] <- enrichplot::emapplot(ora_data[[i]], showCategory = showCategory) + - ggplot2::theme(legend.title=ggplot2::element_text(size=font_size), - legend.text=ggplot2::element_text(size=font_size)) - plots <- c(plots, paste("emapplot",names(ora_data)[i],sep="-")) - } - if("cnetplot" %in% set_plots) { - list_plots[[length(list_plots)+1]] <- enrichplot::cnetplot(ora_data[[i]], showCategory = showCategory) + - ggplot2::theme(legend.title=ggplot2::element_text(size=font_size), - legend.text=ggplot2::element_text(size=font_size)) - plots <- c(plots, paste("cnetplot",names(ora_data)[i],sep="-")) - } - if("upsetplot" %in% set_plots) { - list_plots[[length(list_plots)+1]] <- enrichplot::upsetplot(ora_data[[i]]) + - ggplot2::theme(legend.title=ggplot2::element_text(size=font_size), - legend.text=ggplot2::element_text(size=font_size)) - plots <- c(plots, paste("upsetplot",names(ora_data)[i],sep="-")) +generate.ora.plots <- + function(ora_data, + set_plots = c("dotplot", "emapplot", "cnetplot", "upsetplot"), + showCategory = 20, + font_size = 8) { + list_plots <- list() + plots <- c() + for (i in 1:length(ora_data)) { + if ("dotplot" %in% set_plots) { + list_plots[[length(list_plots) + 1]] <- + enrichplot::dotplot(ora_data[[i]], + showCategory = showCategory, + font.size = font_size) + + ggplot2::theme( + legend.title = ggplot2::element_text(size = font_size), + legend.text = ggplot2::element_text(size = font_size) + ) + plots <- c(plots, paste("dotplot", names(ora_data)[i], sep = "-")) + } + if ("emapplot" %in% set_plots) { + list_plots[[length(list_plots) + 1]] <- + enrichplot::emapplot(ora_data[[i]], showCategory = showCategory) + + ggplot2::theme( + legend.title = ggplot2::element_text(size = font_size), + legend.text = ggplot2::element_text(size = font_size) + ) + plots <- + c(plots, paste("emapplot", names(ora_data)[i], sep = "-")) + } + if ("cnetplot" %in% set_plots) { + list_plots[[length(list_plots) + 1]] <- + enrichplot::cnetplot(ora_data[[i]], showCategory = showCategory) + + ggplot2::theme( + legend.title = ggplot2::element_text(size = font_size), + legend.text = ggplot2::element_text(size = font_size) + ) + plots <- + c(plots, paste("cnetplot", names(ora_data)[i], sep = "-")) + } + if ("upsetplot" %in% set_plots) { + list_plots[[length(list_plots) + 1]] <- + enrichplot::upsetplot(ora_data[[i]]) + + ggplot2::theme( + legend.title = ggplot2::element_text(size = font_size), + legend.text = ggplot2::element_text(size = font_size) + ) + plots <- + c(plots, paste("upsetplot", names(ora_data)[i], sep = "-")) + } } + print(plots) + names(list_plots) <- plots + return(list_plots) } - print(plots) - names(list_plots) <- plots - return(list_plots) -} #'Generate PubMed Central Trend plots. @@ -392,16 +604,27 @@ generate.ora.plots <- function(ora_data, set_plots = c("dotplot", "emapplot", "c #'@importFrom ggplot2 theme #'@importFrom ggplot2 theme_minimal #'@importFrom ggplot2 element_text -novelty.plots <- function(list_genes, gene_id = 'ENTREZID', orgdb = NULL, pubmed = c(2010,2019), font_size = 8){ - pmc.genes = as.character(AnnotationDbi::mapIds(orgdb, list_genes, 'SYMBOL', gene_id)) - print(pmc.genes) - plot <- enrichplot::pmcplot(pmc.genes, pubmed[1]:pubmed[2]) + ggplot2::theme_minimal() + - ggplot2::theme(legend.title=ggplot2::element_text(size=font_size), - legend.text=element_text(size=font_size), - axis.text.x = element_text(size=font_size), - axis.text.y = element_text(size=font_size)) - return(plot) -} +novelty.plots <- + function(list_genes, + gene_id = 'ENTREZID', + orgdb = NULL, + pubmed = c(2010, 2019), + font_size = 8) { + pmc.genes = as.character(AnnotationDbi::mapIds(orgdb, list_genes, 'SYMBOL', gene_id)) + print(pmc.genes) + plot <- + enrichplot::pmcplot(pmc.genes, pubmed[1]:pubmed[2]) + ggplot2::theme_minimal() + + ggplot2::theme( + legend.title = ggplot2::element_text(size = font_size), + legend.text = element_text(size = + font_size), + axis.text.x = element_text(size = + font_size), + axis.text.y = element_text(size = + font_size) + ) + return(plot) + } #'Interactive visualization of tissue-specific networks. #' @@ -422,132 +645,213 @@ novelty.plots <- function(list_genes, gene_id = 'ENTREZID', orgdb = NULL, pubmed #'Possible values are: \code{kegg}, \code{BP}, \code{MF} and \code{CC}. Defaults to \code{kegg}. #'@param verbose logical indicating whether the messages will be displayed or not in the screen. #'@importFrom AnnotationDbi mapIds -visualize.graph <- function(tissue_scores, disease_genes, ppi_network, directed_network = FALSE, - tissue_expr_data, top_target = NULL, db='kegg', verbose = FALSE){ - if(is.null(top_target)) stop('Please specifiy a set of targets (ENTREZ ids)!') - if(is.null(rownames(tissue_expr_data))|is.null(colnames(tissue_expr_data))){ - stop('Both colnames and rownames for tissue_expr_data must be provided.') - } - for(i in 1:2) ppi_network[,i] <- as.character(ppi_network[,i]) - ppi_network <- ppi_network[!duplicated(ppi_network[,1:2]),] - ppi_network_size <- nrow(ppi_network) - if(directed_network) idx <- ppi_network[,1]%in%rownames(tissue_expr_data) - else idx <- ppi_network[,1]%in%rownames(tissue_expr_data) & ppi_network[,2]%in%rownames(tissue_expr_data) - ppi_network <- ppi_network[idx,] - if(nrow(ppi_network)==0) stop('No corresponding IDs between ppi_network and tissue_expr_data.') - else if(ppi_network_size!=nrow(ppi_network)){ - if(verbose) print(paste(nrow(ppi_network),'out of',ppi_network_size,'network edges selected.', sep=' ')) - } - if(!directed_network){ - if(verbose) print('Undirect network. Converting to direct network...') - ppi_network_rev <- ppi_network[,c(2:1)] - colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] - ppi_network <- rbind(ppi_network[,1:2],ppi_network_rev) - } - disease_genes_size <- length(disease_genes) - disease_genes <- intersect(disease_genes,ppi_network[,2]) - if(length(disease_genes)==0) stop('No disease-associated ID match with ppi_network and tissue_expr_data!') - else if( disease_genes_size!=length(disease_genes)){ - if(verbose) print(paste(length(disease_genes),'disease-associated IDs are reachable from the network.',sep=' ')) - } - if(!(db %in% c('kegg','BP','MF','CC'))) stop('Unused value for argument db') - idx <- order(tissue_scores$avg_tissue_score,decreasing=T) - tissue_scores <- tissue_scores[idx,] - g <- igraph::graph_from_edgelist(as.matrix(ppi_network[,1:2]), directed=T) - tissue_expr_data <- scales::rescale(tissue_expr_data,c(1,.Machine$double.eps)) - sign_tiss <- colnames(tissue_scores)[-ncol(tissue_scores)] - sh.path <- lapply(sign_tiss,function(i){ - igraph::E(g)$weight <- tissue_expr_data[ppi_network[,1],i] - lapply(top_target,function(x) - igraph::shortest_paths(g, from=x, to=disease_genes, mode = "out", weights = NULL,output='epath')$epath) - }) - names(sh.path) <- sign_tiss - for(i in names(sh.path)) names(sh.path[[i]]) <- top_target - all_target_path <-lapply(sh.path,function(x)lapply(x,function(y)do.call(c,y))) - all_tissue_path <- lapply(all_target_path,function(x)do.call(c,x)) - ids <- lapply(all_tissue_path,igraph::as_ids) - shinyApp( - ui = fluidPage( - fluidRow( - column( - width = 2, - checkboxGroupInput("tissue","Select tissue/s",sign_tiss,selected = sign_tiss[1]) +visualize.graph <- + function(tissue_scores, + disease_genes, + ppi_network, + directed_network = FALSE, + tissue_expr_data, + top_target = NULL, + db = 'kegg', + verbose = FALSE) { + if (is.null(top_target)) + stop('Please specifiy a set of targets (ENTREZ ids)!') + if (is.null(rownames(tissue_expr_data)) | + is.null(colnames(tissue_expr_data))) { + stop('Both colnames and rownames for tissue_expr_data must be provided.') + } + for (i in 1:2) + ppi_network[, i] <- as.character(ppi_network[, i]) + ppi_network <- ppi_network[!duplicated(ppi_network[, 1:2]), ] + ppi_network_size <- nrow(ppi_network) + if (directed_network) + idx <- ppi_network[, 1] %in% rownames(tissue_expr_data) + else + idx <- + ppi_network[, 1] %in% rownames(tissue_expr_data) & + ppi_network[, 2] %in% rownames(tissue_expr_data) + ppi_network <- ppi_network[idx, ] + if (nrow(ppi_network) == 0) + stop('No corresponding IDs between ppi_network and tissue_expr_data.') + else if (ppi_network_size != nrow(ppi_network)) { + if (verbose) + print(paste( + nrow(ppi_network), + 'out of', + ppi_network_size, + 'network edges selected.', + sep = ' ' + )) + } + if (!directed_network) { + if (verbose) + print('Undirect network. Converting to direct network...') + ppi_network_rev <- ppi_network[, c(2:1)] + colnames(ppi_network_rev) <- colnames(ppi_network)[1:2] + ppi_network <- rbind(ppi_network[, 1:2], ppi_network_rev) + } + disease_genes_size <- length(disease_genes) + disease_genes <- intersect(disease_genes, ppi_network[, 2]) + if (length(disease_genes) == 0) + stop('No disease-associated ID match with ppi_network and tissue_expr_data!') + else if (disease_genes_size != length(disease_genes)) { + if (verbose) + print(paste( + length(disease_genes), + 'disease-associated IDs are reachable from the network.', + sep = ' ' + )) + } + if (!(db %in% c('kegg', 'BP', 'MF', 'CC'))) + stop('Unused value for argument db') + idx <- order(tissue_scores$avg_tissue_score, decreasing = T) + tissue_scores <- tissue_scores[idx, ] + g <- + igraph::graph_from_edgelist(as.matrix(ppi_network[, 1:2]), directed = T) + tissue_expr_data <- + scales::rescale(tissue_expr_data, c(1, .Machine$double.eps)) + sign_tiss <- colnames(tissue_scores)[-ncol(tissue_scores)] + sh.path <- lapply(sign_tiss, function(i) { + igraph::E(g)$weight <- tissue_expr_data[ppi_network[, 1], i] + lapply(top_target, function(x) + igraph::shortest_paths( + g, + from = x, + to = disease_genes, + mode = "out", + weights = NULL, + output = 'epath' + )$epath) + }) + names(sh.path) <- sign_tiss + for (i in names(sh.path)) + names(sh.path[[i]]) <- top_target + all_target_path <- + lapply(sh.path, function(x) + lapply(x, function(y) + do.call(c, y))) + all_tissue_path <- lapply(all_target_path, function(x) + do.call(c, x)) + ids <- lapply(all_tissue_path, igraph::as_ids) + + require(shiny) + require(visNetwork) + shinyApp( + ui = fluidPage( + fluidRow( + column( + width = 2, + checkboxGroupInput("tissue", "Select tissue/s", sign_tiss, selected = sign_tiss[1]) + ), + column(width = 10, + visNetworkOutput("network")) ), - column( - width = 10, - visNetworkOutput("network") - ) - ), - fluidRow( - column( - width = 12, - tableOutput("table") - ) + fluidRow(column(width = 12, + tableOutput("table"))), + textOutput("text") ), - textOutput("text") - ), - server = function(input, output,session) { - last_id <- NULL - output$network <- renderVisNetwork({ - validate(need(input$tissue!='','Please select one or more tissues')) - if(length(input$tissue)>1){ - edge <- strsplit(Reduce(intersect,ids[input$tissue]),split='|',fixed=T) - } - else if (length(input$tissue) ==1){ - edge <- strsplit(ids[[input$tissue]],split='|',fixed=T) - } - edge <- as.data.frame(do.call(rbind,edge)) - edge <- edge[!duplicated(edge),] - colnames(edge) <- c('from','to') - edge$width <- scales::rescale(rowMeans(1-tissue_expr_data[edge$from,input$tissue,drop=F]),c(1,5)) - edge$arrows <- 'to' - nb <- unique(unlist(edge[,1:2])) - node <- data.frame(id=nb,label=nb,stringsAsFactors = F) - node$title <- AnnotationDbi::mapIds(org.Hs.eg.db,as.character(nb),'SYMBOL','ENTREZID', verbose = FALSE) - node$size <- rowMeans(tissue_scores[nb,input$tissue,drop=F])*20 - gp <- rep('bridge gene',nrow(node)) - gp[node$label%in%top_target] <- 'target gene' - gp[node$label%in%disease_genes] <- 'disease gene' - node$group <- gp - node$shape <- rep('dot',length(gp)) - node$shape[gp=='target gene'] <- 'triangle' - node$shape[gp=='disease gene'] <- 'star' - visNetwork::visNetwork(node, edge, height = "1500px", width = "500%") %>% - visGroups(groupname = "target gene", color = list(background = "skyblue", border = "deepskyblue")) %>% - visGroups(groupname = "disease gene", color = list(background = "lightcoral", border = "red")) %>% - visGroups(groupname = "bridge gene", shape="dot") %>% - visLegend(addNodes=list( - list(label = "disease gene", shape = "star", - color = list(background = "lightcoral", border = "red")), - list(label = "target gene", shape = "triangle", - color = list(background = "skyblue", border = "deepskyblue")), - list(label = "bridge gene", shape = "dot")), - useGroups = FALSE,width=0.1) %>% - visIgraphLayout(layout = "layout_with_sugiyama") %>% - visEvents(select = "function(nodes) { + server = function(input, output, session) { + last_id <- NULL + output$network <- renderVisNetwork({ + validate(need(input$tissue != '', 'Please select one or more tissues')) + if (length(input$tissue) > 1) { + edge <- + strsplit(Reduce(intersect, ids[input$tissue]), + split = '|', + fixed = T) + } + else if (length(input$tissue) == 1) { + edge <- strsplit(ids[[input$tissue]], split = '|', fixed = T) + } + edge <- as.data.frame(do.call(rbind, edge)) + edge <- edge[!duplicated(edge), ] + colnames(edge) <- c('from', 'to') + edge$width <- + scales::rescale(rowMeans(1 - tissue_expr_data[edge$from, input$tissue, drop = + F]), c(1, 5)) + edge$arrows <- 'to' + nb <- unique(unlist(edge[, 1:2])) + node <- data.frame(id = nb, + label = nb, + stringsAsFactors = F) + node$title <- + AnnotationDbi::mapIds(org.Hs.eg.db, + as.character(nb), + 'SYMBOL', + 'ENTREZID', + verbose = FALSE) + node$size <- + rowMeans(tissue_scores[nb, input$tissue, drop = F]) * 20 + gp <- rep('bridge gene', nrow(node)) + gp[node$label %in% top_target] <- 'target gene' + gp[node$label %in% disease_genes] <- 'disease gene' + node$group <- gp + node$shape <- rep('dot', length(gp)) + node$shape[gp == 'target gene'] <- 'triangle' + node$shape[gp == 'disease gene'] <- 'star' + visNetwork::visNetwork(node, edge, height = "1500px", width = "500%") %>% + visGroups( + groupname = "target gene", + color = list(background = "skyblue", border = "deepskyblue") + ) %>% + visGroups( + groupname = "disease gene", + color = list(background = "lightcoral", border = "red") + ) %>% + visGroups(groupname = "bridge gene", shape = "dot") %>% + visLegend( + addNodes = list( + list( + label = "disease gene", + shape = "star", + color = list(background = "lightcoral", border = "red") + ), + list( + label = "target gene", + shape = "triangle", + color = list(background = "skyblue", border = "deepskyblue") + ), + list(label = "bridge gene", shape = "dot") + ), + useGroups = FALSE, + width = 0.1 + ) %>% + visIgraphLayout(layout = "layout_with_sugiyama") %>% + visEvents(select = "function(nodes) { Shiny.onInputChange('current_node_id', nodes.nodes); ;}") - }) - output$table <- renderTable({ - res = NULL - if (is.null(last_id) | - !identical(last_id,input$current_node_id) ) { - validate(need(input$current_node_id%in%top_target,'Please select a target gene')) - last_id <<- input$current_node_id - current_target_path <- sapply(all_target_path[input$tissue],function(x) x[input$current_node_id]) - current_target_path <- Reduce(igraph::intersection,current_target_path) - target_interactors <- unique(c(igraph::ends(g,current_target_path))) - if(db=='kegg') res <- clusterProfiler::enrichKEGG(target_interactors,organism = 'hsa')@result - else res <- clusterProfiler::enrichGO(target_interactors, 'org.Hs.eg.db', ont = db)@result - res <- res[res$p.adjust<0.05,] - if (nrow(res) > 25) res <- res[1:25,] - res - } - else{ - validate(need(!is.null(res),'Please select a target gene')) - } - }) - } - ) + }) + output$table <- renderTable({ + res = NULL + if (is.null(last_id) | + !identical(last_id, input$current_node_id)) { + validate(need( + input$current_node_id %in% top_target, + 'Please select a target gene' + )) + last_id <<- input$current_node_id + current_target_path <- + sapply(all_target_path[input$tissue], function(x) + x[input$current_node_id]) + current_target_path <- + Reduce(igraph::intersection, current_target_path) + target_interactors <- + unique(c(igraph::ends(g, current_target_path))) + if (db == 'kegg') + res <- + clusterProfiler::enrichKEGG(target_interactors, organism = 'hsa')@result + else + res <- + clusterProfiler::enrichGO(target_interactors, 'org.Hs.eg.db', ont = db)@result + res <- res[res$p.adjust < 0.05, ] + if (nrow(res) > 25) + res <- res[1:25, ] + res + } + else{ + validate(need(!is.null(res), 'Please select a target gene')) + } + }) + } + ) } - diff --git a/README.md b/README.md index 7f79662..e2c21a5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ThETA ThETA: Transcriptome-driven Efficacy estimates for gene-based TArget discovery -This R package was built to provide the implementation of novel trasncriptome-based efficacy scores of target(gene)-disease associations, which have been recentely described in [Failli et al. 2019](https://www.nature.com/articles/s41598-019-46293-7). It provides utility functions to recompile the scores based on the selection of different disease-relevant gene sets and tissue-specific gene expresstion profiles. Morevoer, it provides the users with an easy access to the disease-gene association scores compiled by the [Open Target platform](https://www.targetvalidation.org/) and functions to merge the OT-based scores with our novel efficacy scores in order to provide a final prioritization of target(gene)-disease associations. Finally, the users can run basic visualization functions in order to visualize the tissue-specific gene networks and biological annotations associated to top drug targets (or genes) and closely related genes slected with a random walk algorithm. +This R package was built to provide the implementation of novel trasncriptome-based efficacy scores of target(gene)-disease associations, which have been recentely described in [Failli et al. 2019](https://www.nature.com/articles/s41598-019-46293-7). It provides utility functions to recompile the scores based on the selection of different disease-relevant gene sets and tissue-specific gene expresstion profiles. Morevoer, it provides the users with an easy access to the disease-gene association scores compiled by the [Open Targets platform](https://www.targetvalidation.org/) and functions to merge the OT-based scores with our novel efficacy scores in order to provide a final prioritization of target(gene)-disease associations. Finally, the users can run basic visualization functions in order to visualize the tissue-specific gene networks and biological annotations associated to top drug targets (or genes) and closely related genes slected with a random walk algorithm. ## Installation diff --git a/ThETA.Rproj b/ThETA.Rproj index 21a4da0..497f8bf 100644 --- a/ThETA.Rproj +++ b/ThETA.Rproj @@ -12,6 +12,9 @@ Encoding: UTF-8 RnwWeave: Sweave LaTeX: pdfLaTeX +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/build/partial.rdb b/build/partial.rdb deleted file mode 100644 index bd62c6d..0000000 Binary files a/build/partial.rdb and /dev/null differ diff --git a/build/vignette.rds b/build/vignette.rds deleted file mode 100644 index 266149b..0000000 Binary files a/build/vignette.rds and /dev/null differ diff --git a/data/centrality_score.rda b/data/centrality_score.rda old mode 100755 new mode 100644 diff --git a/data/dis_vrnts.rda b/data/dis_vrnts.rda old mode 100755 new mode 100644 diff --git a/data/disease_tissue_zscores.rda b/data/disease_tissue_zscores.rda old mode 100755 new mode 100644 diff --git a/data/geo_gene_sets.rda b/data/geo_gene_sets.rda old mode 100755 new mode 100644 diff --git a/data/gtexv7_zscore.rda b/data/gtexv7_zscore.rda old mode 100755 new mode 100644 diff --git a/data/ppi_strdb_700.rda b/data/ppi_strdb_700.rda old mode 100755 new mode 100644 diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib old mode 100755 new mode 100644 diff --git a/inst/GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_median_tpm.gct.gz b/inst/extdata/GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_median_tpm.gct.gz old mode 100755 new mode 100644 similarity index 100% rename from inst/GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_median_tpm.gct.gz rename to inst/extdata/GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_median_tpm.gct.gz diff --git a/inst/conversion_enrichr_efo.csv b/inst/extdata/conversion_enrichr_efo.csv old mode 100755 new mode 100644 similarity index 100% rename from inst/conversion_enrichr_efo.csv rename to inst/extdata/conversion_enrichr_efo.csv diff --git a/inst/efo_feb2019.obo b/inst/extdata/efo_feb2019.obo old mode 100755 new mode 100644 similarity index 100% rename from inst/efo_feb2019.obo rename to inst/extdata/efo_feb2019.obo diff --git a/man/Borda.Rd b/man/Borda.Rd old mode 100755 new mode 100644 diff --git a/man/centrality_score.Rd b/man/centrality_score.Rd old mode 100755 new mode 100644 diff --git a/man/dis_vrnts.Rd b/man/dis_vrnts.Rd old mode 100755 new mode 100644 diff --git a/man/disease_tissue_zscores.Rd b/man/disease_tissue_zscores.Rd old mode 100755 new mode 100644 diff --git a/man/geo.mean.Rd b/man/geo.mean.Rd old mode 100755 new mode 100644 diff --git a/man/geo_gene_sets.Rd b/man/geo_gene_sets.Rd old mode 100755 new mode 100644 diff --git a/man/gtexv7_zscore.Rd b/man/gtexv7_zscore.Rd old mode 100755 new mode 100644 diff --git a/man/integrate.scores.Rd b/man/integrate.scores.Rd old mode 100755 new mode 100644 diff --git a/man/l2norm.Rd b/man/l2norm.Rd old mode 100755 new mode 100644 diff --git a/man/ppi_strdb_700.Rd b/man/ppi_strdb_700.Rd old mode 100755 new mode 100644 diff --git a/vignettes/DataProcessing.Rmd b/vignettes/DataProcessing.Rmd old mode 100755 new mode 100644 index 64ae9e3..c17a288 --- a/vignettes/DataProcessing.Rmd +++ b/vignettes/DataProcessing.Rmd @@ -139,7 +139,7 @@ library(CePa) Then, the function *read.gct()* is utilized to read the GTEx file. ```{r, load_gtex, cache.lazy = TRUE, echo = TRUE, eval = FALSE} -gtexv7_median_tpm <- CePa::read.gct('../data/GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_median_tpm.gct.gz') +gtexv7_median_tpm <- CePa::read.gct(system.file("extdata", 'GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_median_tpm.gct.gz', package="ThETA")) colnames(gtexv7_median_tpm) <- sub("\\.$",'',colnames(gtexv7_median_tpm)) colnames(gtexv7_median_tpm) <- gsub("\\.+",'_',colnames(gtexv7_median_tpm)) ``` @@ -210,7 +210,7 @@ if("ontologyIndex" %in% rownames(installed.packages()) == FALSE) {install.packag library(ontologyIndex) # select the genes associated with each disease annotated in the OBO file -efo.OBO <- get_OBO('../data/efo_feb2019.obo', extract_tags = "everything") +efo.OBO <- get_OBO(system.file("extdata", 'efo_feb2019.obo', package="ThETA"), extract_tags = "everything") # remove the tag "EFO:" from each id efo_ids <- gsub('EFO:','', grep('EFO:', efo.OBO$id, value = T)) # select gene-disease associations from DisGeNET diff --git a/vignettes/DataProcessing_cache/html/__packages b/vignettes/DataProcessing_cache/html/__packages index bc43695..5c2796e 100644 --- a/vignettes/DataProcessing_cache/html/__packages +++ b/vignettes/DataProcessing_cache/html/__packages @@ -11,4 +11,3 @@ bitops RCurl XML kableExtra -BiocStyle diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd old mode 100755 new mode 100644 index d4d5fe7..8059afa --- a/vignettes/Introduction.Rmd +++ b/vignettes/Introduction.Rmd @@ -118,10 +118,10 @@ T2DM_genes = dis_vrnts[[which(names(dis_vrnts) == "EFO:0001360")]] T2DM_rel_tissue_scores = disease_tissue_zscores$z[which(rownames(disease_tissue_zscores$z) == "EFO:0001360"),] ``` -3. A tissue-specific efficacy (TSE) score is then estimated for all genes that are expressed in the tissues that are relevant for T2D. +3. A tissue-specific efficacy (TSE) score is then estimated for all genes that are expressed in the tissues that are relevant for T2D. It should be noted that the following script is computer-intensive. Indeed, we specified only two genes in input. However, it is highly recommended to use the whole set of T2D-relevant genes. ```{r, t2d_sco, cache.lazy = TRUE, echo = TRUE, results = 'hide'} -T2DM_Tscores <- tissue.specific.scores(T2DM_genes$entrez, +T2DM_Tscores <- tissue.specific.scores(T2DM_genes$entrez[1:2], ppi_network = ppi_strdb_700, directed_network = FALSE, tissue_expr_data = gtexv7_zscore, @@ -211,9 +211,9 @@ The following code shows how to: First, DO ids or CUIs are replaced with EFO ids. ```{r, load_conf, cache.lazy = TRUE, echo = TRUE} -enrichr_to_efo <- read.csv(system.file("conversion_enrichr_efo.csv", +enrichr_to_efo <- read.csv(system.file("extdata", "conversion_enrichr_efo.csv", package = "ThETA"), row.names = 1, - stringsAsFactors = F) + stringsAsFactors = FALSE) modulation_scores$disease.id <- enrichr_to_efo[modulation_scores$disease.id,'disease.id'] ``` diff --git a/vignettes/Introduction_cache/html/__packages b/vignettes/Introduction_cache/html/__packages deleted file mode 100644 index 3572a81..0000000 --- a/vignettes/Introduction_cache/html/__packages +++ /dev/null @@ -1,20 +0,0 @@ -base -ThETA -kableExtra -ggplot2 -BiocGenerics -Biobase -S4Vectors -IRanges -AnnotationDbi -org.Hs.eg.db -httr -jsonlite -dplyr -MeSHDbi -MeSH.db -bitops -RCurl -XML -BiocStyle -reshape