Skip to content

Commit 0773972

Browse files
ginbergschuemieAdmin_mschuemi
authored
Release v3.11.0
* fix setting of continous covariates in case of multiple settings and add a test * update test * Optimizing tidyCovariates for large data. Fixes #308 (#309) Co-authored-by: ginberg <ginberg@gmail.com> Co-authored-by: Admin_mschuemi <Admin_mschuemi@its.jnj.com> * cran submission file * add params to getDBCovariateData (#312) * #292 add function to replace cohort schema and table * #292 add params * add test * doc * v3.11.0 * update news --------- Co-authored-by: Martijn Schuemie <schuemie@ohdsi.org> Co-authored-by: Admin_mschuemi <Admin_mschuemi@its.jnj.com>
1 parent d514584 commit 0773972

67 files changed

Lines changed: 390 additions & 88 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CRAN-SUBMISSION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
Version: 3.8.0
2-
Date: 2025-03-19 15:22:25 UTC
3-
SHA: c0961a155c6fba22f3b5e4825b599f2410ed529b
1+
Version: 3.11.0
2+
Date: 2025-09-01 15:11:02 UTC
3+
SHA: 784c4a8dcda6fc0301c8b0823efdfccb1291745e

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: FeatureExtraction
22
Type: Package
33
Title: Generating Features for a Cohort
4-
Version: 3.10.0
5-
Date: 2025-05-08
4+
Version: 3.11.0
5+
Date: 2025-09-01
66
Authors@R: c(
77
person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")),
88
person("Marc", "Suchard", role = c("aut")),

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
FeatureExtraction 3.11.0
2+
=======================
3+
4+
- Improve tidyCovariates performance when using Andromeda version >= 1.0.0 (#308)
5+
- Fix error in merging covariateContinuous to multiple features in getDbCovariateData (#306)
6+
- Add arguments to getDbCovariateData to support a custom covariate cohort schema/table (#292)
7+
18
FeatureExtraction 3.10.0
29
=======================
310

R/GetCovariates.R

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@
6969
#' @param tempEmulationSchema Some database platforms like Oracle and Impala do not truly support
7070
#' temp tables. To emulate temp tables, provide a schema with write
7171
#' privileges where temp tables can be created.
72+
#' @param covariateCohortDatabaseSchema The database schema where the cohorts used to define the covariates can be found.
73+
#' @param covariateCohortTable The table where the cohorts used to define the covariates can be found.
7274
#'
7375
#' @return
7476
#' Returns an object of type \code{covariateData}, containing information on the covariates.
@@ -113,7 +115,9 @@ getDbCovariateData <- function(connectionDetails = NULL,
113115
covariateSettings,
114116
aggregated = FALSE,
115117
minCharacterizationMean = 0,
116-
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) {
118+
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
119+
covariateCohortDatabaseSchema = NULL,
120+
covariateCohortTable = NULL) {
117121
if (is.null(connectionDetails) && is.null(connection)) {
118122
stop("Need to provide either connectionDetails or connection")
119123
}
@@ -181,6 +185,13 @@ getDbCovariateData <- function(connectionDetails = NULL,
181185
hasData <- function(data) {
182186
return(!is.null(data) && (data %>% count() %>% pull()) > 0)
183187
}
188+
if (!is.null(covariateCohortDatabaseSchema) && !is.null(covariateCohortTable)) {
189+
covariateSettings <- replaceCovariateSettingsCohortSchemaTable(
190+
covariateSettings,
191+
covariateCohortDatabaseSchema,
192+
covariateCohortTable
193+
)
194+
}
184195
for (i in 1:length(covariateSettings)) {
185196
fun <- attr(covariateSettings[[i]], "fun")
186197
args <- list(
@@ -209,10 +220,11 @@ getDbCovariateData <- function(connectionDetails = NULL,
209220
if (hasData(covariateData$covariatesContinuous)) {
210221
if (hasData(tempCovariateData$covariatesContinuous)) {
211222
Andromeda::appendToTable(covariateData$covariatesContinuous, tempCovariateData$covariatesContinuous)
212-
} else if (hasData(tempCovariateData$covariatesContinuous)) {
213-
covariateData$covariatesContinuous <- tempCovariateData$covariatesContinuous
214223
}
224+
} else if (hasData(tempCovariateData$covariatesContinuous)) {
225+
covariateData$covariatesContinuous <- tempCovariateData$covariatesContinuous
215226
}
227+
216228
Andromeda::appendToTable(covariateData$covariateRef, tempCovariateData$covariateRef)
217229
Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef)
218230
for (name in names(attr(tempCovariateData, "metaData"))) {

R/GetCovariatesFromOtherCohorts.R

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,3 +325,42 @@ warnIfPredefined <- function(analysisId, temporal = FALSE) {
325325
warning(sprintf("Analysis ID %d also used for prespecified analysis '%s'.", analysisId, preSpecAnalysis$analysisName))
326326
}
327327
}
328+
329+
#' Utility function to set the cohort table & schema on createCohortBasedCovariateSettings
330+
#' with information from the execution settings
331+
#'
332+
#' @param covariateSettings An object of type \code{covariateSettings}
333+
#' @param covariateCohortDatabaseSchema The database schema where the cohorts used to define the covariates can be found.
334+
#' @param covariateCohortTable The table where the cohorts used to define the covariates can be found.
335+
#'
336+
#' @return
337+
#' An object of type \code{covariateSettings}
338+
#'
339+
replaceCovariateSettingsCohortSchemaTable <- function(covariateSettings,
340+
covariateCohortDatabaseSchema,
341+
covariateCohortTable) {
342+
errorMessages <- checkmate::makeAssertCollection()
343+
checkmate::assertList(covariateSettings, min.len = 1, add = errorMessages)
344+
checkmate::assertCharacter(covariateCohortDatabaseSchema, add = errorMessages)
345+
checkmate::assertCharacter(covariateCohortTable, add = errorMessages)
346+
checkmate::reportAssertions(collection = errorMessages)
347+
348+
replaceProperties <- function(s) {
349+
if (inherits(s, "covariateSettings") && "fun" %in% names(attributes(s))) {
350+
if (attr(s, "fun") == "getDbCohortBasedCovariatesData") {
351+
# Set the covariateCohortDatabaseSchema & covariateCohortTable values
352+
s$covariateCohortDatabaseSchema <- covariateCohortDatabaseSchema
353+
s$covariateCohortTable <- covariateCohortTable
354+
}
355+
}
356+
return(s)
357+
}
358+
if (is.null(names(covariateSettings))) {
359+
# List of lists
360+
modifiedCovariateSettings <- lapply(covariateSettings, replaceProperties)
361+
} else {
362+
# Plain list
363+
modifiedCovariateSettings <- replaceProperties(covariateSettings)
364+
}
365+
return(modifiedCovariateSettings)
366+
}

R/Normalization.R

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,19 +179,26 @@ tidyCovariateData <- function(covariateData,
179179
deleteCovariateIds <- c(deleteCovariateIds, toDelete$covariateId)
180180
ParallelLogger::logInfo("Removing ", nrow(toDelete), " infrequent covariates")
181181
}
182-
if (length(deleteCovariateIds) > 0) {
183-
newCovariates <- newCovariates %>%
184-
filter(!.data$covariateId %in% deleteCovariateIds)
185-
}
186182

183+
# When performing both filtering by covariate IDs and normalization, it is *much* faster
184+
# to apply the filtering to the maxValuePerCovariateId table, and let the inner join
185+
# apply the filtering to the covariate table (instead of filtering the covariate table
186+
# directly).
187187
if (normalize) {
188188
ParallelLogger::logInfo("Normalizing covariates")
189+
if (length(deleteCovariateIds) > 0) {
190+
covariateData$maxValuePerCovariateId <- covariateData$maxValuePerCovariateId %>%
191+
filter(!.data$covariateId %in% deleteCovariateIds)
192+
}
189193
newCovariates <- newCovariates %>%
190194
inner_join(covariateData$maxValuePerCovariateId, by = "covariateId") %>%
191195
mutate(covariateValue = .data$covariateValue / .data$maxValue) %>%
192196
select(-.data$maxValue)
193197
metaData$normFactors <- covariateData$maxValuePerCovariateId %>%
194198
collect()
199+
} else if (length(deleteCovariateIds) > 0) {
200+
newCovariates <- newCovariates %>%
201+
filter(!.data$covariateId %in% deleteCovariateIds)
195202
}
196203
newCovariateData$covariates <- newCovariates
197204
if (!is.null(covariateData$timeRef)) {

docs/404.html

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/articles/CreatingCovariatesBasedOnOtherCohorts.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/articles/CreatingCovariatesUsingCohortAttributes.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/articles/CreatingCustomCovariateBuilders.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)