Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
..Rcheck
^.*\.Rproj$
^\.Rproj\.user$
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
# produced vignettes
vignettes/*.html
vignettes/*.pdf
vignettes/*_cache/*
vignettes/*.R

.build.timestamp
build/

# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth
Expand Down
Empty file modified DESCRIPTION
100755 → 100644
Empty file.
Empty file modified NAMESPACE
100755 → 100644
Empty file.
Empty file modified R/data_description.R
100755 → 100644
Empty file.
619 changes: 391 additions & 228 deletions R/script_disease_relevant_tissues.R
100755 → 100644

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions R/script_integration_efficacy_scores.R
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@
#'using max and harmonic sum functions (see \insertRef{Failli2019}{ThETA} for details).
#'@export
#'@importFrom Rdpack reprompt
integrate.scores <- function(data, col.scores = NULL){
if(is.null(col.scores) | length(col.scores) < 2)
stop("Please indicate, at least, two different column-scores.")
integrate.scores <- function(data, col.scores = NULL) {
if (is.null(col.scores) | length(col.scores) < 2)
stop("Please indicate, at least, two different column-scores.")
data[is.na(data)] <- 0
print(dim(data))
data$MAX <- rep(0, nrow(data))
data$HS <- rep(0, nrow(data))
for(i in 1:nrow(data)){
for (i in 1:nrow(data)) {
data$MAX[i] <- max(data[i, col.scores])
data$HS[i] <- sum(sort(data[i,col.scores],decreasing=T)/(1:length(data[i,col.scores]))^2)
data$HS[i] <-
sum(sort(data[i, col.scores], decreasing = T) / (1:length(data[i, col.scores])) ^
2)
}
return(data)
}
Expand Down
136 changes: 93 additions & 43 deletions R/script_modulation_score.R
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -20,71 +20,121 @@
#'@importFrom reshape2 melt
#'@importFrom data.table as.data.table
#'@importFrom scales rescale
modulation.score <- function(geneSets = NULL){
if(is.null(geneSets)){
diff_exp_gene_sets <- MIGSA::downloadEnrichrGeneSets(c('Disease_Perturbations_from_GEO_up',
'Disease_Perturbations_from_GEO_down',
'Single_Gene_Perturbations_from_GEO_up',
'Single_Gene_Perturbations_from_GEO_down'))
modulation.score <- function(geneSets = NULL) {
if (is.null(geneSets)) {
diff_exp_gene_sets <-
MIGSA::downloadEnrichrGeneSets(
c(
'Disease_Perturbations_from_GEO_up',
'Disease_Perturbations_from_GEO_down',
'Single_Gene_Perturbations_from_GEO_up',
'Single_Gene_Perturbations_from_GEO_down'
)
)
geneSets <- list()
for(i in names(diff_exp_gene_sets)){
for (i in names(diff_exp_gene_sets)) {
x <- diff_exp_gene_sets[[i]]
geneSets[[i]] <- list()
is_disease<-grepl('Disease_Perturbations',i)
for(j in 1:length(x)) {
geneSets[[i]][[j]] <- list(geneIds=x[[j]]@geneIds,Description=x[[j]]@setName)
temp <- strsplit(geneSets[[i]][[j]]$Description,' ')[[1]]
idx <- length(temp)-4
if(is_disease) {
geneSets[[i]][[j]]$Description <- tolower(paste(temp[1:(idx-1)],collapse=' '))
is_disease <- grepl('Disease_Perturbations', i)
for (j in 1:length(x)) {
geneSets[[i]][[j]] <-
list(geneIds = x[[j]]@geneIds,
Description = x[[j]]@setName)
temp <- strsplit(geneSets[[i]][[j]]$Description, ' ')[[1]]
idx <- length(temp) - 4
if (is_disease) {
geneSets[[i]][[j]]$Description <-
tolower(paste(temp[1:(idx - 1)], collapse = ' '))
names(geneSets[[i]])[j] <- temp[idx]
}
else{
geneSets[[i]][[j]]$Description <- paste(temp[2:idx],collapse=' ')
geneSets[[i]][[j]]$Description <- paste(temp[2:idx], collapse = ' ')
names(geneSets[[i]])[j] <- toupper(temp[1])
}
if(is_disease){
names(geneSets[[i]])[grepl("^[0-9]+$",names(geneSets[[i]]))] <-
paste('DOID:',names(geneSets[[i]])[grepl("^[0-9]+$",names(geneSets[[i]]))],sep='')
names(geneSets[[i]])[grepl("^C[0-9]+$",names(geneSets[[i]]))] <-
paste('CUI:',names(geneSets[[i]])[grepl("^C[0-9]+$",names(geneSets[[i]]))],sep='')
names(geneSets[[i]])<-gsub('-',':',names(geneSets[[i]]))
if (is_disease) {
names(geneSets[[i]])[grepl("^[0-9]+$", names(geneSets[[i]]))] <-
paste('DOID:', names(geneSets[[i]])[grepl("^[0-9]+$", names(geneSets[[i]]))], sep =
'')
names(geneSets[[i]])[grepl("^C[0-9]+$", names(geneSets[[i]]))] <-
paste('CUI:', names(geneSets[[i]])[grepl("^C[0-9]+$", names(geneSets[[i]]))], sep =
'')
names(geneSets[[i]]) <- gsub('-', ':', names(geneSets[[i]]))
}
}
}
}
prtrb_specular <- lapply(seq(1,4,by=2),function(i) geneSets[c(i,i+1)])
prtrb_specular <-
lapply(seq(1, 4, by = 2), function(i)
geneSets[c(i, i + 1)])
prtrb_specular[[2]] <- rev(prtrb_specular[[2]])
occ.<-mapply(function(x,y) sapply(x,function(i)sapply(y,function(j) length(intersect(i$geneIds,j$geneIds)))),
prtrb_specular[[1]], prtrb_specular[[2]],SIMPLIFY=F)
names(occ.)<-c('up-down','down-up')
occ. <-
mapply(
function(x, y)
sapply(x, function(i)
sapply(y, function(j)
length(
intersect(i$geneIds, j$geneIds)
))),
prtrb_specular[[1]],
prtrb_specular[[2]],
SIMPLIFY = F
)
names(occ.) <- c('up-down', 'down-up')
### Calculating the composite z-scores for each disease-gene perturbation interaction
z1 <-lapply(occ.,function(x)t(apply(x,1,function(y)(y-mean(y))/sd(y))))
z2 <-lapply(occ.,function(x)apply(x,2,function(y)(y-mean(y))/sd(y)))
Z <- mapply('+',z1,z2,SIMPLIFY = F)
z1 <-
lapply(occ., function(x)
t(apply(x, 1, function(y)
(y - mean(y))/sd(y))))
z2 <-
lapply(occ., function(x)
apply(x, 2, function(y)
(y - mean(y))/sd(y)))
Z <- mapply('+', z1, z2, SIMPLIFY = F)
### Composite score
Z[['both']] <- (Z[[1]]+Z[[2]])/2
Z[['both']] <- (Z[[1]] + Z[[2]]) / 2
### Removing indexes of perturbations whose gene signatures correspond to few human orthologs
len <- lapply(geneSets,function(x)sapply(x,function(y)length(y$geneIds)))
out <- sapply(len,function(x){q<-quantile(x);q[2]-1.5*(q[4]-q[2])})
idx <- mapply(function(x,i)x>i,len,out,SIMPLIFY=F)
idx <- lapply(seq(1,4,by=2),function(i)do.call('&',idx[c(i,i+1)]))
mat <- Z$both[idx[[2]],idx[[1]]]
len <-
lapply(geneSets, function(x)
sapply(x, function(y)
length(y$geneIds)))
out <-
sapply(len, function(x) {
q <- quantile(x)
q[2] - 1.5 * (q[4] - q[2])
})
idx <- mapply(function(x, i)
x > i, len, out, SIMPLIFY = F)
idx <- lapply(seq(1, 4, by = 2), function(i)
do.call('&', idx[c(i, i + 1)]))
mat <- Z$both[idx[[2]], idx[[1]]]
### Aggregating target-disease pairs by max score
df <- reshape2::melt(mat, value.name = "modscore")
colnames(df)[1:2] <- c('target.id','disease.id')
annotation <- expand.grid(target.modulationType=unlist(lapply(geneSets[[3]][idx[[2]]],'[',2),use.names = F),
disease.name=unlist(lapply(geneSets[[1]][idx[[1]]],'[',2),use.names = F),KEEP.OUT.ATTRS = F)
df <- cbind(df,annotation)
colnames(df)[1:2] <- c('target.id', 'disease.id')
annotation <-
expand.grid(
target.modulationType = unlist(lapply(geneSets[[3]][idx[[2]]], '[', 2), use.names = F),
disease.name = unlist(lapply(geneSets[[1]][idx[[1]]], '[', 2), use.names = F),
KEEP.OUT.ATTRS = F
)
df <- cbind(df, annotation)
dt <- data.table::as.data.table(df)
dt <- dt[, .SD[which.max(modscore)], by=list(target.id,disease.id)]
dt <-
dt[, data.table::.SD[which.max(modscore)], by = list(target.id, disease.id)]
### Rescaling scores to [0-1] | Setting outliers above Q3 + 1.5 IQR equal to 1
q <- quantile(dt$modscore)
outliers <- q[4]+(1.5*(q[4]-q[2]))
dt$modscore <- scales::rescale(dt$modscore,from=c(min(dt$modscore),outliers),to=c(0,1))
dt$modscore[dt$modscore>1] <- 1
outliers <- q[4] + (1.5 * (q[4] - q[2]))
dt$modscore <-
scales::rescale(dt$modscore,
from = c(min(dt$modscore), outliers),
to = c(0, 1))
dt$modscore[dt$modscore > 1] <- 1
### Generating final data frame
df <- as.data.frame(dt)
df[] <- lapply(df, function(x) if(is.factor(x)) as.character(x) else x)
df[] <-
lapply(df, function(x)
if (is.factor(x))
as.character(x)
else
x)
return(df)
}
Loading