UEFBiomedicalInformaticsLab · juuussi · Nov 19, 2019 · Nov 19, 2019
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,3 +1,2 @@
-..Rcheck
 ^.*\.Rproj$
 ^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
@@ -20,6 +20,11 @@
 # produced vignettes
 vignettes/*.html
 vignettes/*.pdf
+vignettes/*_cache/*
+vignettes/*.R
+
+.build.timestamp
+build/
 
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth

diff --git a/DESCRIPTION b/DESCRIPTION
diff --git a/NAMESPACE b/NAMESPACE
diff --git a/R/data_description.R b/R/data_description.R
diff --git a/R/script_disease_relevant_tissues.R b/R/script_disease_relevant_tissues.R
diff --git a/R/script_integration_efficacy_scores.R b/R/script_integration_efficacy_scores.R
@@ -9,16 +9,18 @@
 #'using max and harmonic sum functions (see \insertRef{Failli2019}{ThETA} for details).
 #'@export
 #'@importFrom Rdpack reprompt
-integrate.scores <- function(data, col.scores = NULL){
-  if(is.null(col.scores) | length(col.scores) < 2)
-      stop("Please indicate, at least, two different column-scores.")
+integrate.scores <- function(data, col.scores = NULL) {
+  if (is.null(col.scores) | length(col.scores) < 2)
+    stop("Please indicate, at least, two different column-scores.")
   data[is.na(data)] <- 0
   print(dim(data))
   data$MAX <- rep(0, nrow(data))
   data$HS <- rep(0, nrow(data))
-  for(i in 1:nrow(data)){
+  for (i in 1:nrow(data)) {
     data$MAX[i] <- max(data[i, col.scores])
-    data$HS[i] <- sum(sort(data[i,col.scores],decreasing=T)/(1:length(data[i,col.scores]))^2)
+    data$HS[i] <-
+      sum(sort(data[i, col.scores], decreasing = T) / (1:length(data[i, col.scores])) ^
+            2)
   }
   return(data)
 }

diff --git a/R/script_modulation_score.R b/R/script_modulation_score.R
@@ -20,71 +20,121 @@
 #'@importFrom reshape2 melt
 #'@importFrom data.table as.data.table
 #'@importFrom scales rescale
-modulation.score <- function(geneSets = NULL){
-  if(is.null(geneSets)){
-    diff_exp_gene_sets <- MIGSA::downloadEnrichrGeneSets(c('Disease_Perturbations_from_GEO_up',
-                                                            'Disease_Perturbations_from_GEO_down',
-                                                            'Single_Gene_Perturbations_from_GEO_up',
-                                                            'Single_Gene_Perturbations_from_GEO_down'))
+modulation.score <- function(geneSets = NULL) {
+  if (is.null(geneSets)) {
+    diff_exp_gene_sets <-
+      MIGSA::downloadEnrichrGeneSets(
+        c(
+          'Disease_Perturbations_from_GEO_up',
+          'Disease_Perturbations_from_GEO_down',
+          'Single_Gene_Perturbations_from_GEO_up',
+          'Single_Gene_Perturbations_from_GEO_down'
+        )
+      )
     geneSets <- list()
-    for(i in names(diff_exp_gene_sets)){
+    for (i in names(diff_exp_gene_sets)) {
       x <- diff_exp_gene_sets[[i]]
       geneSets[[i]] <- list()
-      is_disease<-grepl('Disease_Perturbations',i)
-      for(j in 1:length(x)) {
-        geneSets[[i]][[j]] <- list(geneIds=x[[j]]@geneIds,Description=x[[j]]@setName)
-        temp <- strsplit(geneSets[[i]][[j]]$Description,' ')[[1]]
-        idx <- length(temp)-4
-        if(is_disease) {
-          geneSets[[i]][[j]]$Description <- tolower(paste(temp[1:(idx-1)],collapse=' '))
+      is_disease <- grepl('Disease_Perturbations', i)
+      for (j in 1:length(x)) {
+        geneSets[[i]][[j]] <-
+          list(geneIds = x[[j]]@geneIds,
+               Description = x[[j]]@setName)
+        temp <- strsplit(geneSets[[i]][[j]]$Description, ' ')[[1]]
+        idx <- length(temp) - 4
+        if (is_disease) {
+          geneSets[[i]][[j]]$Description <-
+            tolower(paste(temp[1:(idx - 1)], collapse = ' '))
           names(geneSets[[i]])[j] <- temp[idx]
         }
         else{
-          geneSets[[i]][[j]]$Description <- paste(temp[2:idx],collapse=' ')
+          geneSets[[i]][[j]]$Description <- paste(temp[2:idx], collapse = ' ')
           names(geneSets[[i]])[j] <- toupper(temp[1])
         }
-        if(is_disease){
-          names(geneSets[[i]])[grepl("^[0-9]+$",names(geneSets[[i]]))] <-
-            paste('DOID:',names(geneSets[[i]])[grepl("^[0-9]+$",names(geneSets[[i]]))],sep='')
-          names(geneSets[[i]])[grepl("^C[0-9]+$",names(geneSets[[i]]))] <-
-            paste('CUI:',names(geneSets[[i]])[grepl("^C[0-9]+$",names(geneSets[[i]]))],sep='')
-          names(geneSets[[i]])<-gsub('-',':',names(geneSets[[i]]))
+        if (is_disease) {
+          names(geneSets[[i]])[grepl("^[0-9]+$", names(geneSets[[i]]))] <-
+            paste('DOID:', names(geneSets[[i]])[grepl("^[0-9]+$", names(geneSets[[i]]))], sep =
+                    '')
+          names(geneSets[[i]])[grepl("^C[0-9]+$", names(geneSets[[i]]))] <-
+            paste('CUI:', names(geneSets[[i]])[grepl("^C[0-9]+$", names(geneSets[[i]]))], sep =
+                    '')
+          names(geneSets[[i]]) <- gsub('-', ':', names(geneSets[[i]]))
         }
       }
     }
   }
-  prtrb_specular <- lapply(seq(1,4,by=2),function(i) geneSets[c(i,i+1)])
+  prtrb_specular <-
+    lapply(seq(1, 4, by = 2), function(i)
+      geneSets[c(i, i + 1)])
   prtrb_specular[[2]] <- rev(prtrb_specular[[2]])
-  occ.<-mapply(function(x,y) sapply(x,function(i)sapply(y,function(j) length(intersect(i$geneIds,j$geneIds)))),
-               prtrb_specular[[1]], prtrb_specular[[2]],SIMPLIFY=F)
-  names(occ.)<-c('up-down','down-up')
+  occ. <-
+    mapply(
+      function(x, y)
+        sapply(x, function(i)
+          sapply(y, function(j)
+            length(
+              intersect(i$geneIds, j$geneIds)
+            ))),
+      prtrb_specular[[1]],
+      prtrb_specular[[2]],
+      SIMPLIFY = F
+    )
+  names(occ.) <- c('up-down', 'down-up')
   ### Calculating the composite z-scores for each disease-gene perturbation interaction
-  z1 <-lapply(occ.,function(x)t(apply(x,1,function(y)(y-mean(y))/sd(y))))
-  z2 <-lapply(occ.,function(x)apply(x,2,function(y)(y-mean(y))/sd(y)))
-  Z <- mapply('+',z1,z2,SIMPLIFY = F)
+  z1 <-
+    lapply(occ., function(x)
+      t(apply(x, 1, function(y)
+        (y - mean(y))/sd(y))))
+  z2 <-
+    lapply(occ., function(x)
+      apply(x, 2, function(y)
+        (y - mean(y))/sd(y)))
+  Z <- mapply('+', z1, z2, SIMPLIFY = F)
   ### Composite score
-  Z[['both']] <- (Z[[1]]+Z[[2]])/2
+  Z[['both']] <- (Z[[1]] + Z[[2]]) / 2
   ### Removing indexes of perturbations whose gene signatures correspond to few human orthologs
-  len <- lapply(geneSets,function(x)sapply(x,function(y)length(y$geneIds)))
-  out <- sapply(len,function(x){q<-quantile(x);q[2]-1.5*(q[4]-q[2])})
-  idx <- mapply(function(x,i)x>i,len,out,SIMPLIFY=F)
-  idx <- lapply(seq(1,4,by=2),function(i)do.call('&',idx[c(i,i+1)]))
-  mat <- Z$both[idx[[2]],idx[[1]]]
+  len <-
+    lapply(geneSets, function(x)
+      sapply(x, function(y)
+        length(y$geneIds)))
+  out <-
+    sapply(len, function(x) {
+      q <- quantile(x)
+      q[2] - 1.5 * (q[4] - q[2])
+    })
+  idx <- mapply(function(x, i)
+    x > i, len, out, SIMPLIFY = F)
+  idx <- lapply(seq(1, 4, by = 2), function(i)
+    do.call('&', idx[c(i, i + 1)]))
+  mat <- Z$both[idx[[2]], idx[[1]]]
   ### Aggregating target-disease pairs by max score
   df <- reshape2::melt(mat, value.name = "modscore")
-  colnames(df)[1:2] <- c('target.id','disease.id')
-  annotation <- expand.grid(target.modulationType=unlist(lapply(geneSets[[3]][idx[[2]]],'[',2),use.names = F),
-                            disease.name=unlist(lapply(geneSets[[1]][idx[[1]]],'[',2),use.names = F),KEEP.OUT.ATTRS = F)
-  df <- cbind(df,annotation)
+  colnames(df)[1:2] <- c('target.id', 'disease.id')
+  annotation <-
+    expand.grid(
+      target.modulationType = unlist(lapply(geneSets[[3]][idx[[2]]], '[', 2), use.names = F),
+      disease.name = unlist(lapply(geneSets[[1]][idx[[1]]], '[', 2), use.names = F),
+      KEEP.OUT.ATTRS = F
+    )
+  df <- cbind(df, annotation)
   dt <- data.table::as.data.table(df)
-  dt <- dt[, .SD[which.max(modscore)], by=list(target.id,disease.id)]
+  dt <-
+    dt[, data.table::.SD[which.max(modscore)], by = list(target.id, disease.id)]
   ### Rescaling scores to [0-1] | Setting outliers above Q3 + 1.5 IQR equal to 1
   q <- quantile(dt$modscore)
-  outliers <- q[4]+(1.5*(q[4]-q[2]))
-  dt$modscore <- scales::rescale(dt$modscore,from=c(min(dt$modscore),outliers),to=c(0,1))
-  dt$modscore[dt$modscore>1] <- 1
+  outliers <- q[4] + (1.5 * (q[4] - q[2]))
+  dt$modscore <-
+    scales::rescale(dt$modscore,
+                    from = c(min(dt$modscore), outliers),
+                    to = c(0, 1))
+  dt$modscore[dt$modscore > 1] <- 1
   ### Generating final data frame
   df <- as.data.frame(dt)
-  df[] <- lapply(df, function(x) if(is.factor(x)) as.character(x) else x)
+  df[] <-
+    lapply(df, function(x)
+      if (is.factor(x))
+        as.character(x)
+      else
+        x)
   return(df)
 }