diff --git a/.gitignore b/.gitignore index b91377b..44b3f8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ demo-strategus-cohort-incidence/* +demo-strategus-cohort-methods/* conda-pip-env-github-workflow.yml *sandbox* *.doit.db* @@ -9,6 +10,12 @@ docs/study-agent-development-phenotype-suggestion-tool-architecture-plan.pdf demo/417_Acute_gastrointestinal_bleeding_events.json demo/299_Acute_gastrointestinal_bleeding_or_perforation_events.json demo/1197_Acute_gastrointestinal_bleeding.json +test*.R +!R/OHDSIAssistant/tests/testthat.R +!R/OHDSIAssistant/tests/testthat/test-step-by-step-analytic-settings.R +.obsidian/ +.DS_Store +.codex # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/CODING_AGENT_README.md b/CODING_AGENT_README.md index 9f596c3..1d3385d 100644 --- a/CODING_AGENT_README.md +++ b/CODING_AGENT_README.md @@ -60,6 +60,7 @@ The currently exposed ACP flow endpoints are: - `phenotype_recommendation_advice` - `phenotype_improvements` - `phenotype_intent_split` +- `cohort_methods_intent_split` - `concept_sets_review` - `cohort_critique_general_design` - `keeper_concept_sets_generate` diff --git a/CohortMethod_HANDOFF.md b/CohortMethod_HANDOFF.md new file mode 100644 index 0000000..e2dadae --- /dev/null +++ b/CohortMethod_HANDOFF.md @@ -0,0 +1,76 @@ + +## What Was Implemented + +We added the current CohortMethod shell path around +`OHDSIAssistant::runStrategusCohortMethodsShell()`. In broad terms, the work now +supports: + +- CohortMethod-specific intent splitting from one study-intent sentence into + target, comparator, and one or more outcome statements. + - We kept the existing `phenotype_intent_split` flow unchanged and added a + separate `cohort_methods_intent_split` flow. Please decide whether these + should remain separate or be consolidated later. +- Phenotype recommendation and optional improvement for target, comparator, and + outcome cohorts. +- Negative-control and covariate concept-set selection remain future work. This + step currently implements only the scaffold; it should be completed once ACP + flows for suggesting negative controls and covariate concept sets are + available. +- CohortMethod analytic-settings collection through either `step_by_step` prompts + or `free_text` ACP recommendation. +- Generation of reproducible output artifacts, CohortMethod analytic-settings + JSON, and Strategus R scripts including `scripts/06_cm_spec.R` + +### Main ACP/MCP/core additions: + +- ACP flow endpoint: + - `/flows/cohort_methods_intent_split` + - ACP flow endpoint: `/flows/cohort_methods_specifications_recommendation` +- MCP tool: + - `cohort_methods_intent_split` + - MCP tool: `cohort_methods_prompt_bundle` +- R helper: `OHDSIAssistant::suggestCohortMethodSpecs()` +- R shell integration: `runStrategusCohortMethodsShell()` +- Prompt/template assets: + - CohortMethod intent-split overview/spec/schema assets. + - CohortMethod cmAnalysis template and instruction assets under + `mcp_server/prompts/cohort_methods/`. + +## Read These For Details + +- Shell workflow, output layout, generated scripts, current boundaries, and + analytic-settings prompt details: + - `docs/STRATEGUS_COHORT_METHODS_SHELL.md` +- End-to-end workflow diagrams: + - `docs/COHORT_METHODS_WORKFLOW.md` +- Free-text analytic-settings recommendation flow, endpoint shape, response + shape, and projection from cmAnalysis-style specs: + - `docs/COHORT_METHODS_SPECIFICATIONS_RECOMMENDATION_DESIGN.md` +- CohortMethod ACP/MCP service registry entries: + - `docs/SERVICE_REGISTRY.yaml` +- CohortMethod cmAnalysis prompt/template assets: + - `mcp_server/prompts/cohort_methods/cmAnalysis_template.json` + - `mcp_server/prompts/cohort_methods/CM_ANALYSIS_TEMPLATE.md` + - `mcp_server/prompts/cohort_methods/instruction_cohort_methods_specs.md` + - `mcp_server/prompts/cohort_methods/output_style_cohort_methods_specs.md` +- CohortMethod intent-split prompt/schema assets: + - `mcp_server/prompts/phenotype/overview_cohort_methods_intent_split.md` + - `mcp_server/prompts/phenotype/spec_cohort_methods_intent_split.md` + - `mcp_server/prompts/phenotype/output_schema_cohort_methods_intent_split.json` +- R usage examples: + - `R/OHDSIAssistant/README.md` +- Test and smoke-test commands: + - `docs/TESTING.md` + +## Remaining TODO + + +Future CohortMethod coverage: + +- Replace dummy negative-control and covariate concept-set artifacts with real + ACP/MCP-backed workflows. + - The current placeholder path cannot exclude high-correlation covariates, which can cause script `06_cm_spec.R` to fail when `errorOnHighCorrelation` is enabled. +- Properly implement `scripts/04_keeper_review.R` for CohortMethod outputs. +- Support multiple CohortMethod analyses +- Add validation for cohort-method analytic-settings recommendations before + they are accepted into the shell/generated scripts. diff --git a/R/OHDSIAssistant/NAMESPACE b/R/OHDSIAssistant/NAMESPACE index b9f6b07..c87e423 100644 --- a/R/OHDSIAssistant/NAMESPACE +++ b/R/OHDSIAssistant/NAMESPACE @@ -14,3 +14,5 @@ export(readStrategusDbDetails) export(createStrategusConnectionDetails) export(readStrategusExecutionSettings) export(createStrategusExecutionSettings) +export(runStrategusCohortMethodsShell) +export(suggestCohortMethodSpecs) diff --git a/R/OHDSIAssistant/R/cohort_methods_workflow.R b/R/OHDSIAssistant/R/cohort_methods_workflow.R new file mode 100644 index 0000000..745a65e --- /dev/null +++ b/R/OHDSIAssistant/R/cohort_methods_workflow.R @@ -0,0 +1,210 @@ +#' Suggest cohort method study specifications from a free-text description. +#' +#' Calls the ACP flow `/flows/cohort_methods_specifications_recommendation` +#' and returns the cohort-methods recommendation, full analysis spec for +#' traceability, and per-section rationales. Falls back to a local stub +#' when `acp_state$url` is NULL. +#' +#' @param studyIntent protocol context string +#' @param analyticSettingsDescription free-text description of the study design +#' @param interactive when TRUE, prints a section summary (default: TRUE) +#' @return list response from ACP flow or local stub +#' @export +suggestCohortMethodSpecs <- function(studyIntent, + analyticSettingsDescription, + interactive = TRUE) { + if (is.null(studyIntent) || !nzchar(trimws(studyIntent))) { + stop("Provide a non-empty studyIntent.") + } + if (is.null(analyticSettingsDescription) || !nzchar(trimws(analyticSettingsDescription))) { + stop("Provide a non-empty analyticSettingsDescription.") + } + + body <- list( + study_intent = trimws(as.character(studyIntent)), + study_description = trimws(as.character(analyticSettingsDescription)), + analytic_settings_description = trimws(as.character(analyticSettingsDescription)) + ) + + res <- if (!is.null(acp_state$url)) { + .acp_post("/flows/cohort_methods_specifications_recommendation", body) + } else { + local_cohort_method_specs(body) + } + + if (isTRUE(interactive)) { + cat("\n== Cohort Method Specifications ==\n") + cat("Status:", res$status %||% "(missing)", "\n") + rec <- res$recommendation %||% list() + if (length(rec) > 0) { + cat("Profile:", rec$profile_name %||% "(none)", "\n") + cat("Recommendation status:", rec$status %||% "(none)", "\n") + } + rats <- res$section_rationales %||% list() + if (length(rats) > 0) { + for (section in names(rats)) { + entry <- rats[[section]] + cat(sprintf(" - %s: confidence=%s %s\n", + section, + entry$confidence %||% "?", + entry$rationale %||% "")) + } + } + failed <- res$diagnostics$failed_sections %||% list() + if (length(failed) > 0) { + cat("Backfilled sections:", paste(unlist(failed), collapse = ", "), "\n") + } + .studyAgentPrintCohortMethodSpecsSummary(rec) + } + invisible(res) +} + +.studyAgentCmSpecIsPresent <- function(value) { + if (is.null(value) || length(value) == 0) return(FALSE) + if (length(value) == 1 && is.atomic(value) && is.na(value)) return(FALSE) + TRUE +} + +.studyAgentCmSpecValue <- function(value, path = NULL) { + `%||%` <- function(x, y) if (is.null(x)) y else x + if (!.studyAgentCmSpecIsPresent(value)) return("") + if (is.character(value) && length(value) == 1 && !nzchar(trimws(value))) return("") + if (is.logical(value) && length(value) == 1) return(if (isTRUE(value)) "Yes" else "No") + if (is.character(value) && length(value) == 1) { + mapped <- switch( + path %||% "", + "startAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]], + "endAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]], + "ps_strategy" = c("match_on_ps" = "Match on propensity score", "stratify_by_ps" = "Stratify on propensity score", "none" = "None")[[value]], + "caliperScale" = c("propensity score" = "Propensity score", "standardized" = "Standardized", "standardized logit" = "Standardized logit")[[value]], + "modelType" = c("cox" = "Cox proportional hazards", "poisson" = "Poisson regression", "logistic" = "Logistic regression")[[value]], + "removeDuplicateSubjects" = c( + "keep all" = "Keep All", + "keep first" = "Keep First", + "remove all" = "Remove All", + "keep first, truncate to second" = "Keep First, Truncate to Second" + )[[value]], + NULL + ) + if (!is.null(mapped) && length(mapped) == 1 && !is.na(mapped)) return(mapped) + return(value) + } + if (is.numeric(value) && length(value) == 1) { + return(trimws(formatC(as.numeric(value), format = "fg", digits = 10))) + } + paste(as.character(value), collapse = ", ") +} + +.studyAgentCmSpecRegularized <- function(args) { + if (!is.list(args)) return(FALSE) + .studyAgentCmSpecIsPresent(args$prior) +} + +.studyAgentCmSpecPsStrategy <- function(ps) { + if (!is.list(ps)) return("none") + if (.studyAgentCmSpecIsPresent(ps$matchOnPsArgs)) return("match_on_ps") + if (.studyAgentCmSpecIsPresent(ps$stratifyByPsArgs)) return("stratify_by_ps") + "none" +} + +.studyAgentCmSpecTrimming <- function(trim_args) { + if (!is.list(trim_args) || length(trim_args) == 0) return("None") + if (.studyAgentCmSpecIsPresent(trim_args$equipoiseBounds)) return("By equipoise") + if (.studyAgentCmSpecIsPresent(trim_args$trimFraction)) { + fraction <- suppressWarnings(as.numeric(trim_args$trimFraction)) + if (length(fraction) == 1 && !is.na(fraction) && fraction > 0) { + return(sprintf("By percent (%s%%)", trimws(formatC(fraction * 100, format = "fg", digits = 6)))) + } + } + "None" +} + +.studyAgentCmSpecPrintSection <- function(title, rows) { + cat(sprintf("[%s]\n", title)) + for (row in rows) { + cat(sprintf(" - %s: %s\n", row[[1]], row[[2]])) + } +} + +.studyAgentPrintCohortMethodSpecsSummary <- function(recommendation) { + if (!is.list(recommendation)) return(invisible(NULL)) + study_population <- recommendation$study_population %||% list() + time_at_risk <- recommendation$time_at_risk %||% list() + ps <- recommendation$propensity_score_adjustment %||% list() + outcome <- recommendation$outcome_model %||% list() + cohort_method_data <- study_population$cohortMethodDataArgs %||% list() + if (length(study_population) == 0 && length(time_at_risk) == 0 && length(ps) == 0 && length(outcome) == 0) { + return(invisible(NULL)) + } + + cat("\n") + .studyAgentCmSpecPrintSection("Study Population", list( + list("Study start date", .studyAgentCmSpecValue(cohort_method_data$studyStartDate)), + list("Study end date", .studyAgentCmSpecValue(cohort_method_data$studyEndDate)), + list("Restrict to common period", .studyAgentCmSpecValue(cohort_method_data$restrictToCommonPeriod)), + list("First exposure only", .studyAgentCmSpecValue(cohort_method_data$firstExposureOnly)), + list("Washout period", .studyAgentCmSpecValue(cohort_method_data$washoutPeriod)), + list("Remove duplicate subjects", .studyAgentCmSpecValue(cohort_method_data$removeDuplicateSubjects, "removeDuplicateSubjects")), + list("Censor at new risk window", .studyAgentCmSpecValue(study_population$censorAtNewRiskWindow)), + list("Remove prior outcomes", .studyAgentCmSpecValue(study_population$removeSubjectsWithPriorOutcome)), + list("Prior outcome lookback", .studyAgentCmSpecValue(study_population$priorOutcomeLookback)), + list("Maximum cohort size", .studyAgentCmSpecValue(cohort_method_data$maxCohortSize)) + )) + .studyAgentCmSpecPrintSection("Time At Risk", list( + list("Minimum days at risk", .studyAgentCmSpecValue(study_population$minDaysAtRisk)), + list("Risk window start", .studyAgentCmSpecValue(time_at_risk$riskWindowStart)), + list("Risk window start anchor", .studyAgentCmSpecValue(time_at_risk$startAnchor, "startAnchor")), + list("Risk window end", .studyAgentCmSpecValue(time_at_risk$riskWindowEnd)), + list("Risk window end anchor", .studyAgentCmSpecValue(time_at_risk$endAnchor, "endAnchor")) + )) + .studyAgentCmSpecPrintSection("Propensity Score Adjustment", list( + list("PS trimming", .studyAgentCmSpecTrimming(ps$trimByPsArgs)), + list("PS adjustment strategy", .studyAgentCmSpecValue(.studyAgentCmSpecPsStrategy(ps), "ps_strategy")), + list("Max cohort size for PS fitting", .studyAgentCmSpecValue(ps$createPsArgs$maxCohortSizeForFitting)), + list("Test covariate correlation", .studyAgentCmSpecValue(ps$createPsArgs$errorOnHighCorrelation)), + list("Use regularization", .studyAgentCmSpecValue(.studyAgentCmSpecRegularized(ps$createPsArgs))), + list("Maximum match ratio", .studyAgentCmSpecValue(ps$matchOnPsArgs$maxRatio)), + list("Matching caliper", .studyAgentCmSpecValue(ps$matchOnPsArgs$caliper)), + list("Caliper scale", .studyAgentCmSpecValue(ps$matchOnPsArgs$caliperScale, "caliperScale")) + )) + .studyAgentCmSpecPrintSection("Outcome Model", list( + list("Outcome model", .studyAgentCmSpecValue(outcome$modelType, "modelType")), + list("Condition on strata", .studyAgentCmSpecValue(outcome$stratified)), + list("Use covariates in outcome model", .studyAgentCmSpecValue(outcome$useCovariates)), + list("Use IPTW", .studyAgentCmSpecValue(outcome$inversePtWeighting)), + list("Use regularization", .studyAgentCmSpecValue(.studyAgentCmSpecRegularized(outcome))) + )) + invisible(NULL) +} + +local_cohort_method_specs <- function(body) { + list( + source = "stub_no_acp", + status = "stub", + recommendation = list( + mode = "free_text", + input_method = "typed_text", + source = "local_stub_no_acp", + status = "stub", + profile_name = "Recommended from free-text description (stub)", + raw_description = body$analytic_settings_description %||% "", + study_population = list(), + time_at_risk = list(), + propensity_score_adjustment = list(), + outcome_model = list(), + deferred_inputs = list( + function_argument_description = "implemented", + description_file_path = "implemented", + interactive_typed_description = "implemented" + ), + defaults_snapshot = list() + ), + cohort_methods_specifications = list(), + section_rationales = list(), + diagnostics = list( + source = "local_stub_no_acp", + reason = "acp_state$url is NULL; call acp_connect(url) first." + ), + request = body + ) +} diff --git a/R/OHDSIAssistant/R/execution_settings.R b/R/OHDSIAssistant/R/execution_settings.R index d4c813b..4dabae8 100644 --- a/R/OHDSIAssistant/R/execution_settings.R +++ b/R/OHDSIAssistant/R/execution_settings.R @@ -26,6 +26,9 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu workFolder <- cfg$workFolder resultsFolder <- cfg$resultsFolder cohortIdFieldName <- cfg$cohortIdFieldName %||% "cohort_definition_id" + maxCores <- cfg$maxCores %||% parallel::detectCores() + maxCores <- suppressWarnings(as.integer(maxCores)[1]) + if (is.na(maxCores) || maxCores < 1L) maxCores <- 1L if (!nzchar(cdmDatabaseSchema)) stop("cdmDatabaseSchema must be provided in strategus-execution-settings.json") if (!nzchar(workDatabaseSchema)) stop("workDatabaseSchema must be provided in strategus-execution-settings.json") @@ -38,8 +41,10 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu executionSettings <- createCdmExecutionSettings( cdmDatabaseSchema = cdmDatabaseSchema, workDatabaseSchema = workDatabaseSchema, + cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = cohortTable), workFolder = workFolder, - resultsFolder = resultsFolder + resultsFolder = resultsFolder, + maxCores = maxCores ) list( @@ -51,6 +56,7 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu cohortTable = cohortTable, workFolder = workFolder, resultsFolder = resultsFolder, + maxCores = maxCores, cohortIdFieldName = cohortIdFieldName ) } diff --git a/R/OHDSIAssistant/R/lintStudyDesign.R b/R/OHDSIAssistant/R/lintStudyDesign.R index 00213a5..c4315ce 100644 --- a/R/OHDSIAssistant/R/lintStudyDesign.R +++ b/R/OHDSIAssistant/R/lintStudyDesign.R @@ -26,22 +26,23 @@ lintStudyDesign <- function( if ("concept-sets-review" %in% lintTasks) { res <- if (use_acp) { - .acp_post("/tools/propose_concept_set_diff", list( - conceptSetRef = conceptSetRef, - studyIntent = paste(readLines(studyProtocol, warn = FALSE), collapse=" ") +.acp_post("/flows/concept_sets_review", list( + concept_set_path = conceptSetRef, + study_intent = paste(readLines(studyProtocol, warn = FALSE), collapse=" ") )) } else { local_concept_sets_review(conceptSetRef, studyIntent = paste(readLines(studyProtocol, warn = FALSE), collapse=" ")) } res$artifact <- conceptSetRef + core <- res$full_result %||% res # optional actions handling - if (handleActions && use_acp && length(res$actions %||% list())) { - prev <- applyLLMActionsConceptSet(conceptSetRef, res$actions, preview = TRUE) + if (handleActions && use_acp && length(core$actions %||% list())) { + prev <- applyLLMActionsConceptSet(conceptSetRef, core$actions, preview = TRUE) res$action_preview <- prev if (applyActions) { res$action_apply <- applyLLMActionsConceptSet( conceptSetRef, - res$actions, + core$actions, preview = FALSE, overwrite = overwriteActions, backup = backupActions @@ -51,8 +52,8 @@ lintStudyDesign <- function( if (interactive) { cat("\n== Concept Sets Review ==\n") cat(sprintf("File: %s\n", conceptSetRef)) - cat(res$plan, "\n") - print_findings(res$findings) + cat(core$plan %||% "", "\n") + print_findings(core$findings) if (handleActions && !is.null(res$action_preview)) { cat(sprintf("Action preview: %s changes, %s ignored\n", res$action_preview$counts$changed %||% 0, @@ -67,16 +68,17 @@ lintStudyDesign <- function( if ("cohort-critique-general-design" %in% lintTasks) { res <- if (use_acp) { - .acp_post("/tools/cohort_lint", list(cohortRef = cohortRef)) +.acp_post("/flows/cohort_critique_general_design", list(cohort_path = cohortRef)) } else { local_cohort_critique_general(cohortRef) } res$artifact <- cohortRef + core <- res$full_result %||% res if (interactive) { cat("\n== Cohort Critique: General Design ==\n") cat(sprintf("File: %s\n", cohortRef)) - cat(res$plan, "\n") - print_findings(res$findings) + cat(core$plan %||% "", "\n") + print_findings(core$findings) } results$`cohort-critique-general-design` <- res } diff --git a/R/OHDSIAssistant/R/phenotype_workflow.R b/R/OHDSIAssistant/R/phenotype_workflow.R index 9bdff85..be083e7 100644 --- a/R/OHDSIAssistant/R/phenotype_workflow.R +++ b/R/OHDSIAssistant/R/phenotype_workflow.R @@ -9,8 +9,8 @@ suggestPhenotypes <- function(protocolPath = NULL, studyIntent = NULL, topK = 20, - maxResults = 10, - candidateLimit = NULL, + maxResults = 3, + candidateLimit = 10, interactive = TRUE) { if (!is.null(protocolPath)) { protocolPath <- normalizePath(protocolPath, winslash = "/", mustWork = FALSE) @@ -45,17 +45,23 @@ suggestPhenotypes <- function(protocolPath = NULL, res$artifact <- list(protocolRef = protocolPath) core <- res$recommendations %||% res if (interactive) { - cat("\n== Phenotype Suggestions ==\n") - cat(core$plan %||% "", "\n") - if (!is.null(core$mode)) cat(sprintf("Mode: %s\n", core$mode)) + cat(" +== Phenotype Suggestions == +") + cat(core$plan %||% "", " +") + if (!is.null(core$mode)) cat(sprintf("Mode: %s +", core$mode)) recs <- core$phenotype_recommendations %||% list() if (length(recs) == 0) { - cat(" [stub] No recommendations (LLM not connected or no matches).\n") + cat(" [stub] No recommendations (LLM not connected or no matches). +") } else { for (r in recs) { - cat(sprintf(" - %s (%s): %s\n", - r$cohortName %||% "", - r$cohortId %||% "?", + cat(sprintf(" - %s (%s): %s +", + r$phenotype_name %||% "", + r$phenotype_id %||% "?", r$justification %||% "")) } } @@ -64,31 +70,62 @@ suggestPhenotypes <- function(protocolPath = NULL, } #' Pull phenotype definitions to a local folder -#' @param cohortIds integer vector of cohortIds +#' @param cohortIds character vector of ACP phenotype ids, typically selected from suggestPhenotypes() #' @param outputDir directory to write JSON definitions #' @param overwrite logical; if FALSE, auto-version the filename #' @return character vector of written file paths pullPhenotypeDefinitions <- function(cohortIds, outputDir = ".", overwrite = FALSE) { + phenotype_ids <- as.character(cohortIds %||% character(0)) + if (length(phenotype_ids) == 0) return(character(0)) + + unsupported <- phenotype_ids[!grepl("^ohdsi:", phenotype_ids)] + if (length(unsupported) > 0) { + stop( + sprintf( + paste0( + "pullPhenotypeDefinitions() currently supports OHDSI phenotype ids only. ", + "Conversion of non-OHDSI phenotypes to computable OHDSI cohort definitions is not implemented yet. ", + "Unsupported ids: %s" + ), + paste(unique(unsupported), collapse = ", ") + ) + ) + } + + index_dir <- Sys.getenv("PHENOTYPE_INDEX_DIR", "data/phenotype_index") + index_dir <- normalizePath(index_dir, winslash = "/", mustWork = FALSE) + index_def_dir <- file.path(index_dir, "definitions") + if (!dir.exists(index_def_dir)) { + stop(sprintf("Missing phenotype index definitions folder: %s", index_def_dir)) + } + outputDir <- normalizePath(outputDir, winslash = "/", mustWork = FALSE) if (!dir.exists(outputDir)) dir.create(outputDir, recursive = TRUE) - cds <- PhenotypeLibrary::getPlCohortDefinitionSet(as.integer(cohortIds)) + + definition_path <- function(phenotype_id) { + file.path(index_def_dir, sprintf("%s.json", gsub(":", "__", phenotype_id, fixed = TRUE))) + } + written <- character(0) - for (i in seq_len(nrow(cds))) { - nm <- cds$cohortName[i] %||% "" - safe <- gsub("[^A-Za-z0-9_-]+", "_", nm) - if (identical(safe, "") || is.na(safe)) safe <- paste0("cohort_", cds$cohortId[i]) - fname_base <- file.path(outputDir, sprintf("%s_%s.json", cds$cohortId[i], safe)) - target <- fname_base + for (phenotype_id in phenotype_ids) { + src <- definition_path(phenotype_id) + if (!file.exists(src)) { + stop(sprintf("Phenotype JSON not found: %s", src)) + } + + safe <- gsub("[^A-Za-z0-9_-]+", "_", phenotype_id) + target <- file.path(outputDir, sprintf("%s.json", safe)) if (!overwrite) { idx <- 1 while (file.exists(target)) { - target <- file.path(outputDir, sprintf("%s_%s-v%d.json", cds$cohortId[i], safe, idx)) + target <- file.path(outputDir, sprintf("%s-v%d.json", safe, idx)) idx <- idx + 1 } } - writeLines(cds$json[i], con = target, useBytes = TRUE) + + file.copy(src, target, overwrite = TRUE) written <- c(written, target) } written @@ -113,27 +150,39 @@ reviewPhenotypes <- function(protocolPath, if (!is.null(characterizationPaths)) { characterizationPaths <- unname(vapply(characterizationPaths, normalizePath, character(1), winslash = "/", mustWork = FALSE)) } + body <- list( - protocolRef = protocolPath, - cohortRefs = as.list(cohortJsonPaths), - characterizationRefs = as.list(characterizationPaths %||% list()) + protocol_path = protocolPath, + cohort_paths = as.list(cohortJsonPaths) ) + if (!is.null(characterizationPaths) && length(characterizationPaths) > 0) { + warning("characterizationPaths are not yet forwarded to /flows/phenotype_improvements; ignoring them for now.") + } + res <- if (!is.null(acp_state$url)) { - .acp_post("/tools/phenotype_improvements", body) + .acp_post("/flows/phenotype_improvements", body) } else { local_phenotype_improvements() } - res$artifact <- list(protocolRef = protocolPath, cohortRefs = cohortJsonPaths) + + res$artifact <- list(protocolPath = protocolPath, cohortPaths = cohortJsonPaths) + core <- res$full_result %||% res if (interactive) { - cat("\n== Phenotype Improvements ==\n") - cat(res$plan %||% "", "\n") - if (!is.null(res$mode)) cat(sprintf("Mode: %s\n", res$mode)) - imp <- res$phenotype_improvements %||% list() + cat(" +== Phenotype Improvements == +") + cat(core$plan %||% "", " +") + if (!is.null(core$mode)) cat(sprintf("Mode: %s +", core$mode)) + imp <- core$phenotype_improvements %||% list() if (length(imp) == 0) { - cat(" [stub] No improvements returned (LLM not connected).\n") + cat(" [stub] No improvements returned (LLM not connected). +") } else { for (p in imp) { - cat(sprintf(" - [%s] %s\n", + cat(sprintf(" - [%s] %s +", p$targetCohortId %||% "?", p$summary %||% jsonlite::toJSON(p, auto_unbox = TRUE))) } @@ -141,7 +190,7 @@ reviewPhenotypes <- function(protocolPath, } if (apply) { picks <- selectPhenotypeImprovements( - improvements = res$phenotype_improvements, + improvements = core$phenotype_improvements, cohortJsonPaths = cohortJsonPaths, select = select, apply = TRUE, @@ -151,8 +200,12 @@ reviewPhenotypes <- function(protocolPath, res$selected_improvements <- picks$selected res$written <- picks$written if (interactive && length(picks$written)) { - cat("\nSaved improvement notes:\n") - cat(paste(sprintf(" - %s", picks$written), collapse = "\n"), "\n") + cat(" +Saved improvement notes: +") + cat(paste(sprintf(" - %s", picks$written), collapse = " +"), " +") } } res @@ -160,48 +213,57 @@ reviewPhenotypes <- function(protocolPath, #' Select phenotype recommendations (interactive or programmatic) #' @param recommendations list from suggestPhenotypes()$phenotype_recommendations -#' @param select either numeric cohortIds, integer indices, or "all"/NULL to pick all +#' @param select either phenotype ids, integer indices, or "all"/NULL to pick all #' @param interactive if TRUE and select is NULL, prompt user -#' @return integer vector of chosen cohortIds +#' @return character vector of chosen phenotype ids selectPhenotypeRecommendations <- function(recommendations, select = NULL, interactive = interactive()) { recs <- recommendations %||% list() - if (length(recs) == 0) return(integer(0)) + if (length(recs) == 0) return(character(0)) - # normalize to cohortIds - ids <- vapply(recs, function(r) r$cohortId %||% NA_real_, numeric(1)) + ids <- vapply(recs, function(r) r$phenotype_id %||% NA_character_, character(1)) if (is.null(select) || identical(select, "all")) { if (interactive) { labels <- vapply(seq_along(recs), function(i) { - sprintf("%s (%s)", recs[[i]]$cohortName %||% "", recs[[i]]$cohortId %||% "?") + sprintf("%s (%s)", recs[[i]]$phenotype_name %||% "", recs[[i]]$phenotype_id %||% "?") }, character(1)) picks <- utils::select.list(labels, multiple = TRUE, title = "Select phenotypes to pull") - if (length(picks) == 0) return(integer(0)) + if (length(picks) == 0) return(character(0)) idx <- match(picks, labels) - return(as.integer(ids[idx])) + return(as.character(ids[idx])) } - return(as.integer(ids)) + return(as.character(ids)) } # explicit selection provided if (is.numeric(select)) { - # if they look like indices (<= length), map to ids; else assume cohortIds + # if they look like indices (<= length), map to ids; else assume ids already supplied if (all(select %% 1 == 0) && all(select >= 1) && all(select <= length(ids))) { - return(as.integer(ids[select])) + return(as.character(ids[select])) + } + return(as.character(select)) + } + + if (is.character(select)) { + if (all(select %in% ids)) { + return(as.character(select)) + } + idx <- suppressWarnings(as.integer(select)) + if (!anyNA(idx) && all(idx >= 1) && all(idx <= length(ids))) { + return(as.character(ids[idx])) } - return(as.integer(select)) } - integer(0) + character(0) } #' Select phenotype improvements and optionally persist notes #' @param improvements list from reviewPhenotypes()$phenotype_improvements #' @param cohortJsonPaths character vector of cohort JSON paths -#' @param select optional vector of cohortIds/indices or "all"/NULL to pick all +#' @param select optional vector of phenotype ids, indices, or "all"/NULL to pick all #' @param apply logical; if TRUE, write selected improvements to disk #' @param outputDir directory for notes; defaults to directory of first cohortJsonPath #' @param interactive prompt user selection when select is NULL diff --git a/R/OHDSIAssistant/R/strategus_cohort_methods_shell.R b/R/OHDSIAssistant/R/strategus_cohort_methods_shell.R new file mode 100644 index 0000000..90404cc --- /dev/null +++ b/R/OHDSIAssistant/R/strategus_cohort_methods_shell.R @@ -0,0 +1,5984 @@ +#' Interactive shell to generate Strategus CohortMethod scripts +#' @param outputDir directory where scripts and artifacts will be written +#' @param acpUrl ACP base URL for cohort-method recommendation calls +#' @param studyIntent study intent text +#' @param targetStatement optional explicit target cohort statement used for phenotype recommendation +#' @param comparatorStatement optional explicit comparator cohort statement used for phenotype recommendation +#' @param outcomeStatement optional explicit outcome cohort statement used for phenotype recommendation +#' @param targetCohortId target cohort definition ID +#' @param comparatorCohortId comparator cohort definition ID +#' @param outcomeCohortIds outcome cohort definition IDs +#' @param comparisonLabel optional label for the target-comparator comparison +#' @param topK number of candidates retrieved from MCP search +#' @param maxResults max phenotypes to show +#' @param candidateLimit max candidates to pass to LLM +#' @param indexDir phenotype index directory (contains definitions/ and catalog.jsonl) +#' @param negativeControlConceptSetId optional negative control concept set ID +#' @param includeCovariateConceptSetId optional covariate include concept set ID +#' @param excludeCovariateConceptSetId optional covariate exclude concept set ID +#' @param analyticSettingsDescription optional free-text analytic settings description +#' @param analyticSettingsDescriptionPath optional path to a text file containing the free-text analytic settings description +#' @param incidenceOutputDir optional Strategus CohortIncidence output directory used for cached target/outcome cohort reuse +#' @param interactive whether to prompt for missing inputs +#' @param bannerPath optional path to ASCII banner +#' @param studyAgentBaseDir base directory to resolve relative paths +#' @param reset when TRUE, delete outputDir before running +#' @param allowCache reuse cached flow artifacts when present +#' @param promptOnCache prompt before using cached flow artifacts +#' @param autoApplyImprovements when TRUE, apply improvements without prompting (defaults to TRUE for non-interactive) +#' @param resume when TRUE, prefer cached manual inputs when present +#' @param remapCohortIds when TRUE, assign new local cohort IDs +#' @param cohortIdBase optional starting cohort ID when remapping +#' @return invisible list with output paths +#' @export +.studyAgentAnalyticSettingsSectionPaths <- function() { + list( + study_population = c( + "get_db_cohort_method_data.studyStartDate", + "get_db_cohort_method_data.studyEndDate", + "get_db_cohort_method_data.restrictToCommonPeriod", + "get_db_cohort_method_data.firstExposureOnly", + "get_db_cohort_method_data.washoutPeriod", + "create_study_population.removeDuplicateSubjects", + "create_study_population.censorAtNewRiskWindow", + "create_study_population.removeSubjectsWithPriorOutcome", + "create_study_population.priorOutcomeLookback" + , + "create_study_population.maxCohortSize" + ), + time_at_risk = c( + "create_study_population.minDaysAtRisk", + "create_study_population.riskWindowStart", + "create_study_population.startAnchor", + "create_study_population.riskWindowEnd", + "create_study_population.endAnchor" + ), + propensity_score_adjustment = c( + "ps_adjustment.strategy", + "ps_adjustment.trimmingStrategy", + "ps_adjustment.trimmingPercent", + "ps_adjustment.equipoiseLowerBound", + "ps_adjustment.equipoiseUpperBound", + "create_ps.maxCohortSizeForFitting", + "create_ps.errorOnHighCorrelation", + "create_ps.useRegularization", + "match_on_ps.caliper", + "match_on_ps.caliperScale", + "match_on_ps.maxRatio", + "stratify_by_ps.numberOfStrata", + "stratify_by_ps.baseSelection" + ), + outcome_model = c( + "fit_outcome_model.modelType", + "fit_outcome_model.stratified", + "fit_outcome_model.useCovariates", + "fit_outcome_model.inversePtWeighting", + "fit_outcome_model.useRegularization" + ) + ) +} + +.studyAgentAnalyticSettingsSectionTitles <- function() { + c( + study_population = "Study Population", + time_at_risk = "Time At Risk", + propensity_score_adjustment = "Propensity Score Adjustment", + outcome_model = "Outcome Model" + ) +} + +.studyAgentAnalyticSettingDocs <- function() { + list( + "get_db_cohort_method_data.studyStartDate" = list( + label = "Study start date", + summary_label = "Study start date" + ), + "get_db_cohort_method_data.studyEndDate" = list( + label = "Study end date", + summary_label = "Study end date" + ), + "get_db_cohort_method_data.firstExposureOnly" = list( + label = "First exposure only", + summary_label = "First exposure only", + description = "Should only the first exposure per subject be included?" + ), + "get_db_cohort_method_data.washoutPeriod" = list( + label = "Washout period", + summary_label = "Washout period", + description = "The minimum required continuous observation time (in days) prior to index date for a person to be included in the cohort." + ), + "get_db_cohort_method_data.restrictToCommonPeriod" = list( + label = "Restrict to common period", + summary_label = "Restrict to common period", + description = "Restrict the study to the period when both exposures are present in the data? (E.g. when both drugs are on the market)" + ), + "get_db_cohort_method_data.removeDuplicateSubjects" = list( + label = "Duplicate subjects during extraction", + summary_label = "Duplicate subjects during extraction", + description = "Controls how people who appear in both target and comparator cohorts are handled while extracting data." + ), + "create_study_population.removeDuplicateSubjects" = list( + label = "Remove duplicate subjects", + summary_label = "Remove duplicate subjects", + description = "Remove subjects that are in both the target and comparator cohort?" + ), + "create_study_population.maxCohortSize" = list( + label = "Maximum cohort size", + summary_label = "Maximum cohort size", + description = "If either the target or the comparator cohort is larger than this number it will be sampled to this size. (0 for this value indicates no maximum size)" + ), + "create_study_population.removeSubjectsWithPriorOutcome" = list( + label = "Remove prior outcomes", + summary_label = "Remove prior outcomes", + description = "Remove subjects that have the outcome prior to the risk window start?" + ), + "create_study_population.priorOutcomeLookback" = list( + label = "Prior outcome lookback", + summary_label = "Prior outcome lookback", + description = "How many days should we look back when identifying prior outcomes?" + ), + "create_study_population.riskWindowStart" = list( + label = "Risk window start", + summary_label = "Risk window start" + ), + "create_study_population.minDaysAtRisk" = list( + label = "Minimum days at risk", + summary_label = "Minimum days at risk", + description = "The minimum number of days at risk?" + ), + "create_study_population.startAnchor" = list( + label = "Risk window start anchor", + summary_label = "Risk window start anchor" + ), + "create_study_population.riskWindowEnd" = list( + label = "Risk window end", + summary_label = "Risk window end" + ), + "create_study_population.endAnchor" = list( + label = "Risk window end anchor", + summary_label = "Risk window end anchor" + ), + "create_study_population.censorAtNewRiskWindow" = list( + label = "Censor at new risk window", + summary_label = "Censor at new risk window", + description = "If a subject is in multiple cohorts, should time-at-risk be censored when the new time-at-risk start to prevent overlap?" + ), + "ps_adjustment.strategy" = list( + label = "PS adjustment strategy", + summary_label = "PS adjustment strategy" + ), + "ps_adjustment.trimmingStrategy" = list( + label = "PS trimming", + summary_label = "PS trimming", + description = "How do you want to trim your cohorts based on the propensity score distribution?" + ), + "ps_adjustment.trimmingPercent" = list( + label = "Trimming percent", + summary_label = "Trimming percent", + description = "What percentage of each tail should be removed?" + ), + "ps_adjustment.equipoiseLowerBound" = list( + label = "Equipoise lower bound", + summary_label = "Equipoise lower bound", + description = "What is the lower preference score bound for trimming to equipoise?" + ), + "ps_adjustment.equipoiseUpperBound" = list( + label = "Equipoise upper bound", + summary_label = "Equipoise upper bound", + description = "What is the upper preference score bound for trimming to equipoise?" + ), + "create_ps.estimator" = list( + label = "PS estimator", + summary_label = "PS estimator", + description = "Defines the treatment effect target used when propensity scores are converted into adjustment weights or summaries." + ), + "create_ps.maxCohortSizeForFitting" = list( + label = "Max cohort size for PS fitting", + summary_label = "Max cohort size for PS fitting", + description = "What is the maximum number of people to include in the propensity score model when fitting? Setting this number to 0 means no down-sampling will be applied:" + ), + "create_ps.errorOnHighCorrelation" = list( + label = "Test covariate correlation", + summary_label = "Test covariate correlation", + description = "Test each covariate for correlation with the target assignment? If any covariate has an unusually high correlation (either positive or negative), this will throw an error." + ), + "create_ps.useRegularization" = list( + label = "Use regularization", + summary_label = "Use regularization", + description = "Use regularization when fitting the propensity model?" + ), + "match_on_ps.caliper" = list( + label = "Matching caliper", + summary_label = "Matching caliper", + description = "What is the caliper for matching:" + ), + "match_on_ps.caliperScale" = list( + label = "Caliper scale", + summary_label = "Caliper scale", + description = "What is the caliper scale:" + ), + "match_on_ps.maxRatio" = list( + label = "Maximum match ratio", + summary_label = "Maximum match ratio", + description = "What is the maximum number of persons in the comparator arm to be matched to each person in the target arm within the defined caliper? (0 = means no maximum - all comparators will be assigned to a target person):" + ), + "stratify_by_ps.numberOfStrata" = list( + label = "Number of strata", + summary_label = "Number of strata", + description = "Into how many strata should the propensity score be divided? The boundaries of the strata are automatically defined to contain equal numbers of target persons:" + ), + "stratify_by_ps.baseSelection" = list( + label = "Base selection for strata bounds", + summary_label = "Base selection for strata bounds", + description = "What is the base selection of subjects where the strata bounds are to be determined? Strata are defined as equally-sized strata inside this selection." + ), + "fit_outcome_model.modelType" = list( + label = "Outcome model", + summary_label = "Outcome model" + ), + "fit_outcome_model.stratified" = list( + label = "Condition on strata", + summary_label = "Condition on strata", + description = "Should the regression be conditioned on the strata defined in the population object (e.g. by matching or stratifying on propensity scores)?" + ), + "fit_outcome_model.useCovariates" = list( + label = "Use covariates in outcome model", + summary_label = "Use covariates in outcome model", + description = "Should the covariates also be included in the outcome model?" + ), + "fit_outcome_model.inversePtWeighting" = list( + label = "Use IPTW", + summary_label = "Use IPTW", + description = "Use inverse probability of treatment weighting?" + ), + "fit_outcome_model.useRegularization" = list( + label = "Use regularization", + summary_label = "Use regularization", + description = "Use regularization when fitting the outcome model?" + ) + ) +} + +.studyAgentSummaryLabel <- function(path) { + docs <- .studyAgentAnalyticSettingDocs() + doc <- docs[[path]] + if (is.null(doc)) return(path) + if (!is.null(doc$summary_label)) return(as.character(doc$summary_label)) + if (!is.null(doc$label)) return(as.character(doc$label)) + path +} + +.studyAgentFormatDateForPrompt <- function(value) { + if (is.null(value) || length(value) == 0 || is.na(value)) return("") + value <- trimws(as.character(value[[1]])) + if (!nzchar(value)) return("") + value +} + +.studyAgentFormatAnalyticSettingValue <- function(value, path = NULL) { + `%||%` <- function(x, y) if (is.null(x)) y else x + if (is.null(value) || length(value) == 0 || is.na(value)) return("") + if (is.character(value) && length(value) == 1 && !nzchar(trimws(value))) return("") + if (!is.null(path) && path %in% c("get_db_cohort_method_data.studyStartDate", "get_db_cohort_method_data.studyEndDate")) { + return(.studyAgentFormatDateForPrompt(value)) + } + if (is.logical(value) && length(value) == 1) return(if (isTRUE(value)) "Yes" else "No") + if (is.character(value) && length(value) == 1) { + mapped <- switch( + path %||% "", + "create_study_population.startAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]], + "create_study_population.endAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]], + "ps_adjustment.strategy" = c("match_on_ps" = "Match on propensity score", "stratify_by_ps" = "Stratify on propensity score", "none" = "None")[[value]], + "ps_adjustment.trimmingStrategy" = c("none" = "None", "by_percent" = "By percent", "by_equipoise" = "By equipoise")[[value]], + "match_on_ps.caliperScale" = c("propensity score" = "Propensity score", "standardized" = "Standardized", "standardized logit" = "Standardized logit")[[value]], + "fit_outcome_model.modelType" = c("cox" = "Cox proportional hazards", "poisson" = "Poisson regression", "logistic" = "Logistic regression")[[value]], + "create_study_population.removeDuplicateSubjects" = c("keep all" = "Keep All", "keep first" = "Keep First", "remove all" = "Remove All")[[value]], + "get_db_cohort_method_data.removeDuplicateSubjects" = c("keep all" = "Keep All", "keep first" = "Keep First", "remove all" = "Remove All", "keep first, truncate to second" = "Keep First, Truncate to Second")[[value]], + "stratify_by_ps.baseSelection" = c("all" = "Entire study population", "target" = "Target", "comparator" = "Comparator")[[value]], + NULL + ) + if (!is.null(mapped) && length(mapped) == 1 && !is.na(mapped)) return(mapped) + } + if (!is.null(path) && identical(path, "ps_adjustment.trimmingPercent") && is.numeric(value) && length(value) == 1) { + formatted <- formatC(as.numeric(value), format = "fg", digits = 6) + return(sprintf("%s%%", formatted)) + } + if (is.numeric(value) && length(value) == 1) return(as.character(value)) + paste(as.character(value), collapse = ", ") +} + +.studyAgentOutcomeModelDefaults <- function(ps_strategy = "match_on_ps", + match_max_ratio = 1L, + model_type = "cox") { + normalized_strategy <- as.character(if (is.null(ps_strategy)) "match_on_ps" else ps_strategy) + normalized_ratio <- suppressWarnings(as.integer(if (is.null(match_max_ratio)) 1L else match_max_ratio)) + if (length(normalized_ratio) == 0 || is.na(normalized_ratio)) normalized_ratio <- 1L + + stratified_default <- FALSE + if (identical(normalized_strategy, "stratify_by_ps")) { + stratified_default <- TRUE + } else if (identical(normalized_strategy, "match_on_ps") && normalized_ratio != 1L) { + stratified_default <- TRUE + } + + list( + modelType = as.character(if (is.null(model_type)) "cox" else model_type), + stratified = isTRUE(stratified_default), + useCovariates = FALSE, + inversePtWeighting = FALSE, + useRegularization = TRUE + ) +} + +.studyAgentPrintDefaultSummary <- function(header, defaults, paths) { + docs <- .studyAgentAnalyticSettingDocs() + cat(sprintf("%s\n", header)) + for (path in paths) { + doc <- docs[[path]] + if (is.null(doc)) doc <- list(label = path, description = "") + label <- .studyAgentSummaryLabel(path) + value <- .studyAgentFormatAnalyticSettingValue(.studyAgentGetNestedValue(defaults, path), path = path) + cat(sprintf(" - %s: %s\n", label, value)) + } +} + +.studyAgentPromptKeepDefaults <- function(question, defaults, paths, io_ask_yesno) { + cat(sprintf("%s\n", question)) + .studyAgentPrintDefaultSummary( + "Default settings:", + defaults, + paths + ) + io_ask_yesno("Keep these defaults? Choose No if you want to set the remaining options yourself.", default = TRUE) +} + +.studyAgentPrintAnalyticSettingDescription <- function(path) { + docs <- .studyAgentAnalyticSettingDocs() + doc <- docs[[path]] + if (is.null(doc) || is.null(doc$description)) return(invisible(NULL)) + description <- trimws(as.character(doc$description)) + if (!nzchar(description)) return(invisible(NULL)) + cat(sprintf("%s\n", description)) + invisible(NULL) +} + +.studyAgentPromptAnalyticSetting <- function(working, + path, + ask_yesno, + ask_choice, + ask_integer, + ask_numeric) { + `%||%` <- function(x, y) if (is.null(x)) y else x + + .studyAgentPrintAnalyticSettingDescription(path) + + updated <- switch( + path, + "get_db_cohort_method_data.restrictToCommonPeriod" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "get_db_cohort_method_data.firstExposureOnly" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "get_db_cohort_method_data.washoutPeriod" = .studyAgentSetNestedValue( + working, + path, + ask_integer( + "", + default = as.integer(.studyAgentGetNestedValue(working, path)), + min_value = 0L, + allow_negative = FALSE + ) + ), + "create_study_population.removeDuplicateSubjects" = .studyAgentSetNestedValue( + working, + path, + ask_choice( + "", + choices = c("keep all", "keep first", "remove all"), + labels = c("Keep All", "Keep First", "Remove All"), + default = .studyAgentGetNestedValue(working, path) %||% "keep all" + ) + ), + "create_study_population.censorAtNewRiskWindow" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "create_study_population.removeSubjectsWithPriorOutcome" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "create_study_population.priorOutcomeLookback" = .studyAgentSetNestedValue( + working, + path, + ask_integer( + "", + default = as.integer(.studyAgentGetNestedValue(working, path)), + min_value = 0L, + allow_negative = FALSE + ) + ), + "create_study_population.maxCohortSize" = .studyAgentSetNestedValue( + working, + path, + ask_integer( + "", + default = as.integer(.studyAgentGetNestedValue(working, path)), + min_value = 0L, + allow_negative = FALSE + ) + ), + "create_study_population.minDaysAtRisk" = .studyAgentSetNestedValue( + working, + path, + ask_integer( + "", + default = as.integer(.studyAgentGetNestedValue(working, path)), + min_value = 0L, + allow_negative = FALSE + ) + ), + "ps_adjustment.trimmingStrategy" = { + updated_working <- .studyAgentSetNestedValue( + working, + path, + ask_choice( + "", + choices = c("none", "by_percent", "by_equipoise"), + labels = c("None", "By percent", "By equipoise"), + default = .studyAgentGetNestedValue(working, path) %||% "none" + ) + ) + selected_strategy <- .studyAgentGetNestedValue(updated_working, path) %||% "none" + if (identical(selected_strategy, "by_percent")) { + updated_working <- .studyAgentSetNestedValue( + updated_working, + "ps_adjustment.trimmingPercent", + ask_numeric( + "", + default = as.numeric(.studyAgentGetNestedValue(updated_working, "ps_adjustment.trimmingPercent") %||% 5), + min_value = 0 + ) + ) + updated_working <- .studyAgentSetNestedValue(updated_working, "ps_adjustment.equipoiseLowerBound", 0.25) + updated_working <- .studyAgentSetNestedValue(updated_working, "ps_adjustment.equipoiseUpperBound", 0.75) + } else if (identical(selected_strategy, "by_equipoise")) { + updated_working <- .studyAgentSetNestedValue( + updated_working, + "ps_adjustment.equipoiseLowerBound", + ask_numeric( + "", + default = as.numeric(.studyAgentGetNestedValue(updated_working, "ps_adjustment.equipoiseLowerBound") %||% 0.25), + min_value = 0 + ) + ) + updated_working <- .studyAgentSetNestedValue( + updated_working, + "ps_adjustment.equipoiseUpperBound", + ask_numeric( + "", + default = as.numeric(.studyAgentGetNestedValue(updated_working, "ps_adjustment.equipoiseUpperBound") %||% 0.75), + min_value = 0 + ) + ) + updated_working <- .studyAgentSetNestedValue(updated_working, "ps_adjustment.trimmingPercent", 5) + } else { + updated_working <- .studyAgentSetNestedValue(updated_working, "ps_adjustment.trimmingPercent", 5) + updated_working <- .studyAgentSetNestedValue(updated_working, "ps_adjustment.equipoiseLowerBound", 0.25) + updated_working <- .studyAgentSetNestedValue(updated_working, "ps_adjustment.equipoiseUpperBound", 0.75) + } + updated_working + }, + "ps_adjustment.trimmingPercent" = .studyAgentSetNestedValue( + working, + path, + ask_numeric( + "", + default = as.numeric(.studyAgentGetNestedValue(working, path) %||% 5), + min_value = 0 + ) + ), + "ps_adjustment.equipoiseLowerBound" = .studyAgentSetNestedValue( + working, + path, + ask_numeric( + "", + default = as.numeric(.studyAgentGetNestedValue(working, path) %||% 0.25), + min_value = 0 + ) + ), + "ps_adjustment.equipoiseUpperBound" = .studyAgentSetNestedValue( + working, + path, + ask_numeric( + "", + default = as.numeric(.studyAgentGetNestedValue(working, path) %||% 0.75), + min_value = 0 + ) + ), + "create_ps.maxCohortSizeForFitting" = .studyAgentSetNestedValue( + working, + path, + ask_integer( + "", + default = as.integer(.studyAgentGetNestedValue(working, path)), + min_value = 0L, + allow_negative = FALSE + ) + ), + "create_ps.errorOnHighCorrelation" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "create_ps.useRegularization" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "match_on_ps.caliper" = .studyAgentSetNestedValue( + working, + path, + ask_numeric( + "", + default = as.numeric(.studyAgentGetNestedValue(working, path)), + min_value = 0 + ) + ), + "match_on_ps.caliperScale" = .studyAgentSetNestedValue( + working, + path, + ask_choice( + "", + choices = c("propensity score", "standardized", "standardized logit"), + labels = c("Propensity score", "Standardized", "Standardized logit"), + default = .studyAgentGetNestedValue(working, path) %||% "standardized logit" + ) + ), + "stratify_by_ps.baseSelection" = .studyAgentSetNestedValue( + working, + path, + ask_choice( + "", + choices = c("all", "target", "comparator"), + labels = c("Entire study population", "Target", "Comparator"), + default = .studyAgentGetNestedValue(working, path) %||% "all" + ) + ), + "fit_outcome_model.stratified" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "fit_outcome_model.useCovariates" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "fit_outcome_model.inversePtWeighting" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + "fit_outcome_model.useRegularization" = .studyAgentSetNestedValue( + working, + path, + ask_yesno( + "", + default = isTRUE(.studyAgentGetNestedValue(working, path)) + ) + ), + stop(sprintf("Unsupported analytic setting customization path: %s", path)) + ) + + updated +} + +.studyAgentCustomizeAnalyticSettings <- function(working, + paths, + ask_yesno, + ask_choice, + ask_integer, + ask_numeric) { + updated <- working + for (path in paths) { + updated <- .studyAgentPromptAnalyticSetting( + updated, + path, + ask_yesno = ask_yesno, + ask_choice = ask_choice, + ask_integer = ask_integer, + ask_numeric = ask_numeric + ) + } + updated +} + +.studyAgentSummaryPathsForSection <- function(section_name, section_paths, settings) { + paths <- section_paths[[section_name]] + if (!identical(section_name, "propensity_score_adjustment")) { + return(paths) + } + + strategy <- .studyAgentGetNestedValue(settings, "ps_adjustment.strategy") + if (identical(strategy, "match_on_ps")) { + trim_strategy <- .studyAgentGetNestedValue(settings, "ps_adjustment.trimmingStrategy") + trim_paths <- c("ps_adjustment.trimmingStrategy") + if (identical(trim_strategy, "by_percent")) { + trim_paths <- c(trim_paths, "ps_adjustment.trimmingPercent") + } else if (identical(trim_strategy, "by_equipoise")) { + trim_paths <- c(trim_paths, "ps_adjustment.equipoiseLowerBound", "ps_adjustment.equipoiseUpperBound") + } + return(c( + trim_paths, + "ps_adjustment.strategy", + "create_ps.maxCohortSizeForFitting", + "create_ps.errorOnHighCorrelation", + "create_ps.useRegularization", + "match_on_ps.maxRatio", + "match_on_ps.caliper", + "match_on_ps.caliperScale" + )) + } + if (identical(strategy, "stratify_by_ps")) { + trim_strategy <- .studyAgentGetNestedValue(settings, "ps_adjustment.trimmingStrategy") + trim_paths <- c("ps_adjustment.trimmingStrategy") + if (identical(trim_strategy, "by_percent")) { + trim_paths <- c(trim_paths, "ps_adjustment.trimmingPercent") + } else if (identical(trim_strategy, "by_equipoise")) { + trim_paths <- c(trim_paths, "ps_adjustment.equipoiseLowerBound", "ps_adjustment.equipoiseUpperBound") + } + return(c( + trim_paths, + "ps_adjustment.strategy", + "create_ps.maxCohortSizeForFitting", + "create_ps.errorOnHighCorrelation", + "create_ps.useRegularization", + "stratify_by_ps.numberOfStrata", + "stratify_by_ps.baseSelection" + )) + } + trim_strategy <- .studyAgentGetNestedValue(settings, "ps_adjustment.trimmingStrategy") + trim_paths <- c("ps_adjustment.trimmingStrategy") + if (identical(trim_strategy, "by_percent")) { + trim_paths <- c(trim_paths, "ps_adjustment.trimmingPercent") + } else if (identical(trim_strategy, "by_equipoise")) { + trim_paths <- c(trim_paths, "ps_adjustment.equipoiseLowerBound", "ps_adjustment.equipoiseUpperBound") + } + c(trim_paths, "ps_adjustment.strategy") +} + +.studyAgentPrintFinalSettingsSummary <- function(settings, section_paths) { + `%||%` <- function(x, y) if (is.null(x)) y else x + docs <- .studyAgentAnalyticSettingDocs() + section_titles <- .studyAgentAnalyticSettingsSectionTitles() + cat("\nFinal analytic settings\n") + cat(sprintf("Profile name: %s\n", .studyAgentFormatAnalyticSettingValue(settings$profile_name))) + for (section_name in names(section_paths)) { + title <- section_titles[[section_name]] %||% gsub("_", " ", section_name, fixed = TRUE) + cat(sprintf("[%s]\n", title)) + for (path in .studyAgentSummaryPathsForSection(section_name, section_paths, settings)) { + label <- .studyAgentSummaryLabel(path) + value <- .studyAgentFormatAnalyticSettingValue(.studyAgentGetNestedValue(settings, path), path = path) + cat(sprintf(" - %s: %s\n", label, value)) + } + } +} + +.studyAgentValueForReviewFile <- function(value) { + if (is.null(value) || length(value) == 0 || all(is.na(value))) return("") + if (is.logical(value) && length(value) == 1) return(if (isTRUE(value)) "true" else "false") + if (length(value) == 1) return(as.character(value)) + paste(as.character(value), collapse = ", ") +} + +.studyAgentCoerceReviewValue <- function(value, current_value) { + `%||%` <- function(x, y) if (is.null(x)) y else x + value <- trimws(as.character(value %||% "")) + if (!nzchar(value)) { + if (is.character(current_value) && length(current_value) == 1 && !nzchar(trimws(current_value))) return("") + if (length(current_value) == 0 || all(is.na(current_value))) return(NA) + } + if (is.logical(current_value) && length(current_value) == 1) { + normalized <- tolower(value) + if (normalized %in% c("true", "t", "yes", "y", "1")) return(TRUE) + if (normalized %in% c("false", "f", "no", "n", "0")) return(FALSE) + return(value) + } + if (is.integer(current_value) && length(current_value) == 1) { + return(suppressWarnings(as.integer(value))) + } + if (is.numeric(current_value) && length(current_value) == 1) { + return(suppressWarnings(as.numeric(value))) + } + value +} + +.studyAgentWriteAnalyticSettingsReviewFile <- function(settings, section_paths, path) { + `%||%` <- function(x, y) if (is.null(x)) y else x + section_titles <- .studyAgentAnalyticSettingsSectionTitles() + lines <- c( + "# Edit values after ':' only. Save this file, close the editor, then return to the R shell.", + "# Blank study dates are allowed. Boolean values accept true/false.", + "", + sprintf("profile_name: %s", .studyAgentValueForReviewFile(settings$profile_name)), + "" + ) + for (section_name in names(section_paths)) { + title <- section_titles[[section_name]] %||% section_name + lines <- c(lines, sprintf("[%s]", title)) + for (path_key in .studyAgentSummaryPathsForSection(section_name, section_paths, settings)) { + lines <- c(lines, sprintf( + "%s: %s", + path_key, + .studyAgentValueForReviewFile(.studyAgentGetNestedValue(settings, path_key)) + )) + } + lines <- c(lines, "") + } + writeLines(lines, con = path, useBytes = TRUE) + invisible(path) +} + +.studyAgentReadAnalyticSettingsReviewFile <- function(path, settings) { + lines <- readLines(path, warn = FALSE) + updated <- settings + for (line in lines) { + stripped <- trimws(line) + if (!nzchar(stripped) || startsWith(stripped, "#") || grepl("^\\[[^]]+\\]$", stripped)) next + if (!grepl(":", stripped, fixed = TRUE)) next + key <- trimws(sub(":.*$", "", stripped)) + value <- trimws(sub("^[^:]*:", "", stripped)) + if (identical(key, "profile_name")) { + updated$profile_name <- value + next + } + current_value <- .studyAgentGetNestedValue(updated, key) + updated <- .studyAgentSetNestedValue( + updated, + key, + .studyAgentCoerceReviewValue(value, current_value) + ) + } + updated +} + +.studyAgentDefaultCohortMethodAnalyticSettings <- function(covariate_enabled = FALSE) { + list( + profile_name = "Analytic Setting 1", + source = "manual_shell", + customized_sections = character(0), + get_db_cohort_method_data = list( + studyStartDate = "", + studyEndDate = "", + firstExposureOnly = TRUE, + washoutPeriod = 365L, + restrictToCommonPeriod = TRUE, + removeDuplicateSubjects = "keep first, truncate to second" + ), + create_study_population = list( + maxCohortSize = 0L, + removeDuplicateSubjects = "keep all", + removeSubjectsWithPriorOutcome = TRUE, + priorOutcomeLookback = 99999L, + minDaysAtRisk = 1L, + riskWindowStart = 0L, + startAnchor = "cohort start", + riskWindowEnd = 0L, + endAnchor = "cohort end", + censorAtNewRiskWindow = FALSE + ), + create_ps = list( + estimator = "att", + maxCohortSizeForFitting = 250000L, + errorOnHighCorrelation = FALSE, + useRegularization = TRUE + ), + ps_adjustment = list( + strategy = "match_on_ps", + trimmingStrategy = "none", + trimmingPercent = 5, + equipoiseLowerBound = 0.25, + equipoiseUpperBound = 0.75 + ), + match_on_ps = list( + caliper = 0.2, + caliperScale = "standardized logit", + maxRatio = 1L + ), + stratify_by_ps = list( + numberOfStrata = 10L, + baseSelection = "all" + ), + fit_outcome_model = list( + modelType = "cox", + stratified = FALSE, + useCovariates = FALSE, + inversePtWeighting = FALSE, + useRegularization = TRUE + ), + covariate_concept_sets = list( + enabled = isTRUE(covariate_enabled), + include_all_concepts = TRUE, + include_concept_set_id = NA_integer_, + exclude_concept_set_id = NA_integer_ + ) + ) +} + +.studyAgentGetNestedValue <- function(x, path) { + parts <- strsplit(path, ".", fixed = TRUE)[[1]] + current <- x + for (part in parts) { + if (!is.list(current) || is.null(current[[part]])) return(NULL) + current <- current[[part]] + } + current +} + +.studyAgentSetNestedValue <- function(x, path, value) { + parts <- strsplit(path, ".", fixed = TRUE)[[1]] + + set_rec <- function(obj, idx = 1L) { + key <- parts[[idx]] + if (idx == length(parts)) { + obj[[key]] <- value + return(obj) + } + child <- obj[[key]] + if (!is.list(child)) child <- list() + obj[[key]] <- set_rec(child, idx + 1L) + obj + } + + set_rec(x, 1L) +} + +.studyAgentResetSectionPaths <- function(current_settings, default_settings, paths) { + updated <- current_settings + for (path in paths) { + updated <- .studyAgentSetNestedValue( + updated, + path, + .studyAgentGetNestedValue(default_settings, path) + ) + } + updated +} + +.studyAgentDeepMerge <- function(defaults, overrides) { + if (is.null(overrides)) return(defaults) + for (name in names(overrides)) { + override_value <- overrides[[name]] + default_value <- defaults[[name]] + if (is.list(default_value) && is.list(override_value) && !is.data.frame(override_value)) { + defaults[[name]] <- .studyAgentDeepMerge(default_value, override_value) + } else if (!is.null(override_value)) { + defaults[[name]] <- override_value + } + } + defaults +} + +.studyAgentDateStringOrEmpty <- function(value, label) { + if (is.null(value) || length(value) == 0 || is.na(value)) return("") + value <- trimws(as.character(value[[1]])) + if (!nzchar(value)) return("") + if (grepl("^[0-9]{8}$", value)) return(value) + stop(sprintf("%s must be blank or formatted as YYYYMMDD.", label)) +} + +.studyAgentDefaultCmAnalysisTemplate <- function() { + list( + description = "", + getDbCohortMethodDataArgs = list( + studyStartDate = "", + studyEndDate = "", + firstExposureOnly = FALSE, + removeDuplicateSubjects = "keep all", + restrictToCommonPeriod = FALSE, + washoutPeriod = 365L, + maxCohortSize = 0L + ), + createStudyPopArgs = list( + removeSubjectsWithPriorOutcome = TRUE, + priorOutcomeLookback = 99999L, + minDaysAtRisk = 1L, + riskWindowStart = 1L, + startAnchor = "cohort start", + riskWindowEnd = 0L, + endAnchor = "cohort end", + censorAtNewRiskWindow = FALSE + ), + trimByPsArgs = list( + trimFraction = 0.05, + equipoiseBounds = NA + ), + matchOnPsArgs = list( + maxRatio = 1L, + caliper = 0.2, + caliperScale = "standardized logit" + ), + stratifyByPsArgs = NA, + createPsArgs = list( + maxCohortSizeForFitting = 250000L, + errorOnHighCorrelation = TRUE, + prior = list( + priorType = "laplace", + useCrossValidation = TRUE + ), + control = list( + tolerance = 2e-7, + cvType = "auto", + fold = 10L, + cvRepetitions = 10L, + noiseLevel = "silent", + resetCoefficients = TRUE, + startingVariance = 0.01 + ) + ), + fitOutcomeModelArgs = list( + modelType = "cox", + stratified = FALSE, + useCovariates = FALSE, + inversePtWeighting = FALSE, + prior = list( + priorType = "laplace", + useCrossValidation = TRUE + ), + control = list( + tolerance = 2e-7, + cvType = "auto", + fold = 10L, + cvRepetitions = 10L, + noiseLevel = "quiet", + resetCoefficients = TRUE, + startingVariance = 0.01 + ) + ) + ) +} + +.studyAgentLoadCmAnalysisTemplate <- function(template_path = NULL) { + template <- .studyAgentDefaultCmAnalysisTemplate() + if (!is.null(template_path) && length(template_path) > 0 && !is.na(template_path) && nzchar(template_path) && file.exists(template_path)) { + loaded <- jsonlite::fromJSON(template_path, simplifyVector = FALSE) + template <- .studyAgentDeepMerge(template, loaded) + } + template +} + +.studyAgentBuildCmAnalysisJson <- function(settings, template = NULL) { + `%||%` <- function(x, y) if (is.null(x)) y else x + template <- template %||% .studyAgentDefaultCmAnalysisTemplate() + + ps_strategy <- settings$ps_adjustment$strategy %||% "match_on_ps" + trimming_strategy <- settings$ps_adjustment$trimmingStrategy %||% "none" + ps_regularized <- isTRUE(settings$create_ps$useRegularization) + outcome_regularized <- isTRUE(settings$fit_outcome_model$useRegularization) + + ps_prior <- if (ps_regularized) template$createPsArgs$prior else NA + ps_control <- if (ps_regularized) template$createPsArgs$control else NA + outcome_prior <- if (outcome_regularized) template$fitOutcomeModelArgs$prior else NA + outcome_control <- if (outcome_regularized) template$fitOutcomeModelArgs$control else NA + + trim_args <- NA + if (identical(trimming_strategy, "by_percent")) { + trim_args <- list( + trimFraction = as.numeric(settings$ps_adjustment$trimmingPercent) / 100, + equipoiseBounds = NA + ) + } else if (identical(trimming_strategy, "by_equipoise")) { + trim_args <- list( + trimFraction = NA_real_, + equipoiseBounds = c( + as.numeric(settings$ps_adjustment$equipoiseLowerBound), + as.numeric(settings$ps_adjustment$equipoiseUpperBound) + ) + ) + } + + match_args <- if (identical(ps_strategy, "match_on_ps")) { + list( + maxRatio = as.integer(settings$match_on_ps$maxRatio), + caliper = as.numeric(settings$match_on_ps$caliper), + caliperScale = as.character(settings$match_on_ps$caliperScale) + ) + } else { + NA + } + + stratify_args <- if (identical(ps_strategy, "stratify_by_ps")) { + list( + numberOfStrata = as.integer(settings$stratify_by_ps$numberOfStrata), + baseSelection = as.character(settings$stratify_by_ps$baseSelection) + ) + } else { + NA + } + + create_ps_args <- if (identical(ps_strategy, "none") && identical(trimming_strategy, "none")) { + NA + } else { + list( + maxCohortSizeForFitting = as.integer(settings$create_ps$maxCohortSizeForFitting), + errorOnHighCorrelation = isTRUE(settings$create_ps$errorOnHighCorrelation), + prior = ps_prior, + control = ps_control + ) + } + + list( + description = as.character(settings$profile_name), + getDbCohortMethodDataArgs = list( + studyStartDate = as.character(settings$get_db_cohort_method_data$studyStartDate %||% ""), + studyEndDate = as.character(settings$get_db_cohort_method_data$studyEndDate %||% ""), + firstExposureOnly = isTRUE(settings$get_db_cohort_method_data$firstExposureOnly), + removeDuplicateSubjects = as.character(settings$get_db_cohort_method_data$removeDuplicateSubjects), + restrictToCommonPeriod = isTRUE(settings$get_db_cohort_method_data$restrictToCommonPeriod), + washoutPeriod = as.integer(settings$get_db_cohort_method_data$washoutPeriod), + maxCohortSize = as.integer(settings$create_study_population$maxCohortSize) + ), + createStudyPopArgs = list( + removeSubjectsWithPriorOutcome = isTRUE(settings$create_study_population$removeSubjectsWithPriorOutcome), + priorOutcomeLookback = as.integer(settings$create_study_population$priorOutcomeLookback), + minDaysAtRisk = as.integer(settings$create_study_population$minDaysAtRisk), + riskWindowStart = as.integer(settings$create_study_population$riskWindowStart), + startAnchor = as.character(settings$create_study_population$startAnchor), + riskWindowEnd = as.integer(settings$create_study_population$riskWindowEnd), + endAnchor = as.character(settings$create_study_population$endAnchor), + censorAtNewRiskWindow = isTRUE(settings$create_study_population$censorAtNewRiskWindow) + ), + trimByPsArgs = trim_args, + matchOnPsArgs = match_args, + stratifyByPsArgs = stratify_args, + createPsArgs = create_ps_args, + fitOutcomeModelArgs = list( + modelType = as.character(settings$fit_outcome_model$modelType), + stratified = isTRUE(settings$fit_outcome_model$stratified), + useCovariates = isTRUE(settings$fit_outcome_model$useCovariates), + inversePtWeighting = isTRUE(settings$fit_outcome_model$inversePtWeighting), + prior = outcome_prior, + control = outcome_control + ) + ) +} + +.studyAgentCollectStepByStepAnalyticSettings <- function(default_settings, + seed_settings, + interactive = TRUE, + io = NULL) { + `%||%` <- function(x, y) if (is.null(x)) y else x + + normalize_seed <- function(settings) { + settings <- settings %||% list() + if (is.null(settings$ps_adjustment)) { + settings$ps_adjustment <- list(strategy = "match_on_ps") + } + if (is.null(settings$stratify_by_ps)) { + settings$stratify_by_ps <- list( + numberOfStrata = default_settings$stratify_by_ps$numberOfStrata, + baseSelection = default_settings$stratify_by_ps$baseSelection + ) + } + settings + } + + ask_text <- function(prompt, default = "", allow_blank = FALSE) { + if (!isTRUE(interactive)) return(default) + value <- io$text(prompt = prompt, default = default, allow_blank = allow_blank) + trimmed <- trimws(as.character(value %||% "")) + if (!nzchar(trimmed) && !isTRUE(allow_blank)) { + stop(sprintf("A non-empty value is required for: %s", prompt)) + } + trimmed + } + + ask_yesno <- function(prompt, default = TRUE) { + if (!isTRUE(interactive)) return(default) + io$yesno(prompt = prompt, default = default) + } + + ask_choice <- function(prompt, choices, default, labels = choices) { + if (!isTRUE(interactive)) return(default) + io$choice(prompt = prompt, choices = choices, default = default, labels = labels) + } + + ask_integer <- function(prompt, default, min_value = NULL, allow_negative = TRUE) { + if (!isTRUE(interactive)) return(as.integer(default)) + io$integer( + prompt = prompt, + default = default, + min_value = min_value, + allow_negative = allow_negative + ) + } + + ask_numeric <- function(prompt, default, min_value = NULL) { + if (!isTRUE(interactive)) return(as.numeric(default)) + io$numeric(prompt = prompt, default = default, min_value = min_value) + } + + section_paths <- .studyAgentAnalyticSettingsSectionPaths() + working <- .studyAgentDeepMerge(default_settings, normalize_seed(seed_settings)) + working <- .studyAgentSetNestedValue( + working, + "get_db_cohort_method_data.removeDuplicateSubjects", + .studyAgentGetNestedValue(default_settings, "get_db_cohort_method_data.removeDuplicateSubjects") + ) + working <- .studyAgentSetNestedValue( + working, + "create_ps.estimator", + .studyAgentGetNestedValue(default_settings, "create_ps.estimator") + ) + working <- .studyAgentSetNestedValue( + working, + "create_ps.errorOnHighCorrelation", + isTRUE(.studyAgentGetNestedValue(default_settings, "create_ps.errorOnHighCorrelation")) + ) + working <- .studyAgentSetNestedValue( + working, + "create_ps.useRegularization", + isTRUE(.studyAgentGetNestedValue(default_settings, "create_ps.useRegularization")) + ) + working <- .studyAgentSetNestedValue( + working, + "stratify_by_ps.baseSelection", + .studyAgentGetNestedValue(default_settings, "stratify_by_ps.baseSelection") + ) + working$source <- "manual_shell" + working$customized_sections <- character(0) + + show_section <- function(label) { + if (isTRUE(interactive) && !is.null(io$section_header)) { + io$section_header(label) + } + } + + show_section("Study Population") + study_start <- ask_text( + "Study start date (YYYYMMDD, leave blank for no restriction)", + default = .studyAgentFormatDateForPrompt(.studyAgentGetNestedValue(working, "get_db_cohort_method_data.studyStartDate")), + allow_blank = TRUE + ) + study_end <- ask_text( + "Study end date (YYYYMMDD, leave blank for no restriction)", + default = .studyAgentFormatDateForPrompt(.studyAgentGetNestedValue(working, "get_db_cohort_method_data.studyEndDate")), + allow_blank = TRUE + ) + working <- .studyAgentSetNestedValue( + working, + "get_db_cohort_method_data.studyStartDate", + .studyAgentDateStringOrEmpty(study_start, "Study start date") + ) + working <- .studyAgentSetNestedValue( + working, + "get_db_cohort_method_data.studyEndDate", + .studyAgentDateStringOrEmpty(study_end, "Study end date") + ) + study_population_non_core <- setdiff( + section_paths$study_population, + c( + "get_db_cohort_method_data.studyStartDate", + "get_db_cohort_method_data.studyEndDate" + ) + ) + if (isTRUE(interactive)) { + keep_study_population_defaults <- .studyAgentPromptKeepDefaults( + "For the remaining study population settings, keep the defaults or choose each option yourself?", + default_settings, + study_population_non_core + , + ask_yesno + ) + if (isTRUE(keep_study_population_defaults)) { + working <- .studyAgentResetSectionPaths(working, default_settings, study_population_non_core) + } else { + working <- .studyAgentCustomizeAnalyticSettings( + working, + study_population_non_core, + ask_yesno = ask_yesno, + ask_choice = ask_choice, + ask_integer = ask_integer, + ask_numeric = ask_numeric + ) + } + } + + show_section("Time At Risk") + anchor_choices <- c("cohort start", "cohort end") + anchor_labels <- c("cohort start date", "cohort end date") + working <- .studyAgentSetNestedValue( + working, + "create_study_population.startAnchor", + ask_choice( + "Risk window start anchor", + choices = anchor_choices, + labels = anchor_labels, + default = .studyAgentGetNestedValue(working, "create_study_population.startAnchor") %||% anchor_choices[[1]] + ) + ) + working <- .studyAgentSetNestedValue( + working, + "create_study_population.riskWindowStart", + ask_integer( + "Risk window start (days)", + default = as.integer(.studyAgentGetNestedValue(working, "create_study_population.riskWindowStart")), + allow_negative = TRUE + ) + ) + working <- .studyAgentSetNestedValue( + working, + "create_study_population.endAnchor", + ask_choice( + "Risk window end anchor", + choices = anchor_choices, + labels = anchor_labels, + default = .studyAgentGetNestedValue(working, "create_study_population.endAnchor") %||% anchor_choices[[2]] + ) + ) + working <- .studyAgentSetNestedValue( + working, + "create_study_population.riskWindowEnd", + ask_integer( + "Risk window end (days)", + default = as.integer(.studyAgentGetNestedValue(working, "create_study_population.riskWindowEnd")), + allow_negative = TRUE + ) + ) + if (isTRUE(interactive)) { + tar_non_core <- c("create_study_population.minDaysAtRisk") + keep_tar_defaults <- .studyAgentPromptKeepDefaults( + "For the remaining time-at-risk settings, keep the defaults or choose each option yourself?", + default_settings, + tar_non_core, + ask_yesno + ) + if (isTRUE(keep_tar_defaults)) { + working <- .studyAgentResetSectionPaths(working, default_settings, tar_non_core) + } else { + working <- .studyAgentCustomizeAnalyticSettings( + working, + tar_non_core, + ask_yesno = ask_yesno, + ask_choice = ask_choice, + ask_integer = ask_integer, + ask_numeric = ask_numeric + ) + } + } + + show_section("Propensity Score Adjustment") + strategy_choices <- c("match_on_ps", "stratify_by_ps", "none") + strategy_labels <- c("Match on propensity score", "Stratify on propensity score", "None") + working <- .studyAgentSetNestedValue( + working, + "ps_adjustment.strategy", + ask_choice( + "PS adjustment strategy", + choices = strategy_choices, + labels = strategy_labels, + default = .studyAgentGetNestedValue(working, "ps_adjustment.strategy") %||% strategy_choices[[1]] + ) + ) + current_strategy <- .studyAgentGetNestedValue(working, "ps_adjustment.strategy") %||% "match_on_ps" + if (isTRUE(interactive)) { + if (identical(current_strategy, "none")) { + ps_default_paths <- c( + "ps_adjustment.trimmingStrategy", + "create_ps.maxCohortSizeForFitting", + "create_ps.errorOnHighCorrelation", + "create_ps.useRegularization" + ) + keep_ps_defaults <- .studyAgentPromptKeepDefaults( + "For the remaining propensity score adjustment settings, keep the defaults?", + default_settings, + ps_default_paths, + ask_yesno + ) + if (isTRUE(keep_ps_defaults)) { + working <- .studyAgentResetSectionPaths( + working, + default_settings, + c( + ps_default_paths, + "ps_adjustment.trimmingPercent", + "ps_adjustment.equipoiseLowerBound", + "ps_adjustment.equipoiseUpperBound" + ) + ) + } else { + working <- .studyAgentCustomizeAnalyticSettings( + working, + ps_default_paths, + ask_yesno = ask_yesno, + ask_choice = ask_choice, + ask_integer = ask_integer, + ask_numeric = ask_numeric + ) + } + working <- .studyAgentResetSectionPaths( + working, + default_settings, + c( + "match_on_ps.caliper", + "match_on_ps.caliperScale", + "match_on_ps.maxRatio", + "stratify_by_ps.numberOfStrata", + "stratify_by_ps.baseSelection" + ) + ) + } else if (identical(current_strategy, "match_on_ps")) { + working <- .studyAgentSetNestedValue( + working, + "match_on_ps.maxRatio", + ask_integer( + "Maximum match ratio (0 means no maximum)", + default = as.integer(.studyAgentGetNestedValue(working, "match_on_ps.maxRatio")), + min_value = 0L, + allow_negative = FALSE + ) + ) + ps_default_paths <- c( + "ps_adjustment.trimmingStrategy", + "create_ps.maxCohortSizeForFitting", + "create_ps.errorOnHighCorrelation", + "create_ps.useRegularization", + "match_on_ps.caliper", + "match_on_ps.caliperScale" + ) + keep_ps_defaults <- .studyAgentPromptKeepDefaults( + "For the remaining propensity score adjustment settings, keep the defaults?", + default_settings, + ps_default_paths, + ask_yesno + ) + if (isTRUE(keep_ps_defaults)) { + working <- .studyAgentResetSectionPaths( + working, + default_settings, + c( + ps_default_paths, + "ps_adjustment.trimmingPercent", + "ps_adjustment.equipoiseLowerBound", + "ps_adjustment.equipoiseUpperBound" + ) + ) + } else { + working <- .studyAgentCustomizeAnalyticSettings( + working, + ps_default_paths, + ask_yesno = ask_yesno, + ask_choice = ask_choice, + ask_integer = ask_integer, + ask_numeric = ask_numeric + ) + } + working <- .studyAgentResetSectionPaths( + working, + default_settings, + c("stratify_by_ps.numberOfStrata", "stratify_by_ps.baseSelection") + ) + } else if (identical(current_strategy, "stratify_by_ps")) { + working <- .studyAgentSetNestedValue( + working, + "stratify_by_ps.numberOfStrata", + ask_integer( + "Number of strata", + default = as.integer(.studyAgentGetNestedValue(working, "stratify_by_ps.numberOfStrata")), + min_value = 1L, + allow_negative = FALSE + ) + ) + working <- .studyAgentSetNestedValue( + working, + "stratify_by_ps.baseSelection", + .studyAgentGetNestedValue(default_settings, "stratify_by_ps.baseSelection") + ) + ps_default_paths <- c( + "ps_adjustment.trimmingStrategy", + "create_ps.maxCohortSizeForFitting", + "create_ps.errorOnHighCorrelation", + "create_ps.useRegularization", + "stratify_by_ps.baseSelection" + ) + keep_ps_defaults <- .studyAgentPromptKeepDefaults( + "For the remaining propensity score adjustment settings, keep the defaults?", + default_settings, + ps_default_paths, + ask_yesno + ) + if (isTRUE(keep_ps_defaults)) { + working <- .studyAgentResetSectionPaths( + working, + default_settings, + c( + ps_default_paths, + "ps_adjustment.trimmingPercent", + "ps_adjustment.equipoiseLowerBound", + "ps_adjustment.equipoiseUpperBound" + ) + ) + } else { + working <- .studyAgentCustomizeAnalyticSettings( + working, + ps_default_paths, + ask_yesno = ask_yesno, + ask_choice = ask_choice, + ask_integer = ask_integer, + ask_numeric = ask_numeric + ) + } + working <- .studyAgentResetSectionPaths( + working, + default_settings, + c("match_on_ps.caliper", "match_on_ps.caliperScale", "match_on_ps.maxRatio") + ) + } + } + + show_section("Outcome Model") + outcome_model_defaults <- .studyAgentOutcomeModelDefaults( + ps_strategy = current_strategy, + match_max_ratio = .studyAgentGetNestedValue(working, "match_on_ps.maxRatio"), + model_type = .studyAgentGetNestedValue(working, "fit_outcome_model.modelType") %||% default_settings$fit_outcome_model$modelType + ) + model_choices <- c("cox", "poisson", "logistic") + model_labels <- c("Cox proportional hazards", "Poisson regression", "Logistic regression") + working <- .studyAgentSetNestedValue( + working, + "fit_outcome_model.modelType", + ask_choice( + "Outcome model", + choices = model_choices, + labels = model_labels, + default = .studyAgentGetNestedValue(working, "fit_outcome_model.modelType") %||% model_choices[[1]] + ) + ) + outcome_model_defaults$modelType <- .studyAgentGetNestedValue(working, "fit_outcome_model.modelType") %||% outcome_model_defaults$modelType + outcome_defaults_for_display <- .studyAgentDeepMerge( + default_settings, + list(fit_outcome_model = outcome_model_defaults) + ) + keep_outcome_defaults <- !isTRUE(interactive) + if (isTRUE(interactive)) { + keep_outcome_defaults <- .studyAgentPromptKeepDefaults( + "For the remaining outcome model settings, keep the defaults or choose each option yourself?", + outcome_defaults_for_display, + setdiff(section_paths$outcome_model, "fit_outcome_model.modelType") + , + ask_yesno + ) + if (!isTRUE(keep_outcome_defaults)) { + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.stratified", isTRUE(outcome_model_defaults$stratified)) + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.useCovariates", isTRUE(outcome_model_defaults$useCovariates)) + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.inversePtWeighting", isTRUE(outcome_model_defaults$inversePtWeighting)) + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.useRegularization", isTRUE(outcome_model_defaults$useRegularization)) + working <- .studyAgentCustomizeAnalyticSettings( + working, + setdiff(section_paths$outcome_model, "fit_outcome_model.modelType"), + ask_yesno = ask_yesno, + ask_choice = ask_choice, + ask_integer = ask_integer, + ask_numeric = ask_numeric + ) + } + } + if (isTRUE(keep_outcome_defaults)) { + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.stratified", isTRUE(outcome_model_defaults$stratified)) + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.useCovariates", isTRUE(outcome_model_defaults$useCovariates)) + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.inversePtWeighting", isTRUE(outcome_model_defaults$inversePtWeighting)) + working <- .studyAgentSetNestedValue(working, "fit_outcome_model.useRegularization", isTRUE(outcome_model_defaults$useRegularization)) + } + + if (isTRUE(interactive)) { + working$profile_name <- ask_text( + "Analytic settings profile name", + default = as.character(working$profile_name %||% default_settings$profile_name), + allow_blank = FALSE + ) + } + + customized_sections <- names(section_paths)[vapply(names(section_paths), function(section_name) { + paths <- section_paths[[section_name]] + any(vapply(paths, function(path) { + !identical( + .studyAgentGetNestedValue(working, path), + .studyAgentGetNestedValue(default_settings, path) + ) + }, logical(1))) + }, logical(1))] + working$customized_sections <- customized_sections + + list( + settings = working, + section_flow = names(section_paths), + customized_sections = customized_sections + ) +} + +runStrategusCohortMethodsShell <- function(outputDir = "demo-strategus-cohort-methods", + acpUrl = "http://127.0.0.1:8765", + studyIntent = NULL, + targetStatement = NULL, + comparatorStatement = NULL, + outcomeStatement = NULL, + targetCohortId = NULL, + comparatorCohortId = NULL, + outcomeCohortIds = NULL, + comparisonLabel = NULL, + topK = 20, + maxResults = 20, + candidateLimit = 20, + indexDir = Sys.getenv("PHENOTYPE_INDEX_DIR", "data/phenotype_index"), + negativeControlConceptSetId = NULL, + includeCovariateConceptSetId = NULL, + excludeCovariateConceptSetId = NULL, + analyticSettingsDescription = NULL, + analyticSettingsDescriptionPath = NULL, + incidenceOutputDir = "demo-strategus-cohort-incidence", + interactive = TRUE, + bannerPath = "ohdsi-logo-ascii.txt", + studyAgentBaseDir = Sys.getenv("STUDY_AGENT_BASE_DIR", ""), + reset = FALSE, + allowCache = TRUE, + promptOnCache = TRUE, + autoApplyImprovements = NA, + resume = FALSE, + remapCohortIds = TRUE, + cohortIdBase = NULL) { + `%||%` <- function(x, y) if (is.null(x)) y else x + + ensure_dir <- function(path) { + if (!dir.exists(path)) dir.create(path, recursive = TRUE, showWarnings = FALSE) + } + + prompt_yesno <- function(prompt, default = TRUE) { + if (!isTRUE(interactive)) return(default) + suffix <- if (default) "[Y/n]" else "[y/N]" + resp <- tolower(trimws(readline(sprintf("%s %s ", prompt, suffix)))) + if (resp == "") return(default) + if (resp %in% c("y", "yes")) return(TRUE) + if (resp %in% c("n", "no")) return(FALSE) + default + } + + maybe_use_cache <- function(path, label) { + if (!allowCache || !file.exists(path)) return(FALSE) + if (isTRUE(resume)) return(TRUE) + if (!promptOnCache) return(TRUE) + prompt_yesno(sprintf("Use cached %s at %s?", label, path), default = TRUE) + } + + read_json <- function(path) { + jsonlite::fromJSON(path, simplifyVector = FALSE) + } + + write_json <- function(x, path) { + jsonlite::write_json(x, path, pretty = TRUE, auto_unbox = TRUE, na = "null") + } + + analysis_label_max_chars <- 50L + shorten_analysis_label <- function(value, max_chars = analysis_label_max_chars) { + value <- trimws(as.character(value %||% "")) + if (!nzchar(value)) return(value) + if (nchar(value, type = "chars") <= max_chars) return(value) + paste0(substr(value, 1L, max_chars - 3L), "...") + } + prompt_analysis_label <- function(label, default, max_chars = analysis_label_max_chars) { + default <- trimws(as.character(default %||% label)) + if (!nzchar(default)) default <- label + if (!isTRUE(interactive)) return(shorten_analysis_label(default, max_chars)) + + current <- default + repeat { + prompt <- if (nchar(current, type = "chars") <= max_chars) { + sprintf("%s analysis label [%s]: ", label, current) + } else { + sprintf( + "%s analysis label [%s]:\nLabel must be %s characters or fewer.\n%s analysis label: ", + label, + current, + max_chars, + label + ) + } + entered <- trimws(readline(prompt)) + candidate <- if (nzchar(entered)) entered else if (nchar(current, type = "chars") <= max_chars) current else "" + if (!nzchar(candidate)) { + cat(sprintf("Analysis label must be %s characters or fewer. Please enter a shorter label.\n", max_chars)) + next + } + if (nchar(candidate, type = "chars") > max_chars) { + cat(sprintf("Analysis label must be %s characters or fewer. Please shorten it.\n", max_chars)) + current <- candidate + next + } + return(candidate) + } + } + + if (length(autoApplyImprovements) == 0 || is.na(autoApplyImprovements[[1]])) { + autoApplyImprovements <- !isTRUE(interactive) + } else { + autoApplyImprovements <- isTRUE(autoApplyImprovements) + } + + apply_action <- function(obj, action) { + path <- action$path %||% "" + value <- action$value + if (!nzchar(path)) return(obj) + segs <- strsplit(path, "/", fixed = TRUE)[[1]] + segs <- segs[segs != ""] + + set_in <- function(x, segs, value) { + if (length(segs) == 0) return(value) + seg <- segs[[1]] + name <- seg + idx <- NA_integer_ + if (grepl("\\[\\d+\\]$", seg)) { + name <- sub("\\[\\d+\\]$", "", seg) + idx <- as.integer(sub("^.*\\[(\\d+)\\]$", "\\1", seg)) + } + if (name != "") { + if (is.null(x[[name]])) x[[name]] <- list() + if (length(segs) == 1) { + if (!is.na(idx)) { + if (length(x[[name]]) < idx) { + while (length(x[[name]]) < idx) x[[name]][[length(x[[name]]) + 1]] <- list() + } + x[[name]][[idx]] <- value + } else { + x[[name]] <- value + } + return(x) + } + if (!is.na(idx)) { + if (length(x[[name]]) < idx) { + while (length(x[[name]]) < idx) x[[name]][[length(x[[name]]) + 1]] <- list() + } + x[[name]][[idx]] <- set_in(x[[name]][[idx]], segs[-1], value) + } else { + x[[name]] <- set_in(x[[name]], segs[-1], value) + } + return(x) + } + idx <- suppressWarnings(as.integer(seg)) + if (is.na(idx)) return(x) + if (idx == 0) idx <- 1 + if (length(x) < idx) { + while (length(x) < idx) x[[length(x) + 1]] <- list() + } + if (length(segs) == 1) { + x[[idx]] <- value + return(x) + } + x[[idx]] <- set_in(x[[idx]], segs[-1], value) + x + } + + set_in(obj, segs, value) + } + + is_mutating_improvement_action <- function(action) { + action_type <- tolower(trimws(as.character(action$type %||% "note"))) + action_type %in% c("set", "replace", "update") + } + + checkpoint_path <- function(label) { + file.path(output_dir, paste0("checkpoint_", label, ".json")) + } + + mark_checkpoint <- function(label, payload = list()) { + checkpoint <- list(step = label) + if (length(payload) > 0) checkpoint <- c(checkpoint, payload) + write_json(checkpoint, checkpoint_path(label)) + } + + checkpoint_label_for_role_advice <- function(role_label) { + role_slug <- tolower(gsub("[^A-Za-z0-9]+", "_", trimws(role_label))) + role_slug <- gsub("^_+|_+$", "", role_slug) + paste0(role_slug, "_advice") + } + + is_absolute_path <- function(path) { + grepl("^(/|[A-Za-z]:[\\\\/])", path) + } + + resolve_path <- function(path, base_dir = "") { + if (!nzchar(path)) return(path) + if (is_absolute_path(path)) return(path) + if (nzchar(base_dir)) return(file.path(base_dir, path)) + path + } + + parse_ids <- function(x) { + if (is.null(x)) return(integer(0)) + if (is.numeric(x) || is.integer(x)) return(as.integer(x)) + if (is.character(x)) { + pieces <- unlist(strsplit(paste(x, collapse = ","), "[,[:space:]]+")) + pieces <- pieces[nzchar(trimws(pieces))] + return(as.integer(pieces)) + } + integer(0) + } + + normalize_selected_ids <- function(value, label, allow_multiple = FALSE) { + ids <- unique(parse_ids(value)) + ids <- as.integer(ids[!is.na(ids)]) + if (!isTRUE(allow_multiple) && length(ids) > 1) { + stop(sprintf("%s must contain exactly one cohort ID.", label)) + } + ids + } + + collect_single_id <- function(value, label) { + ids <- parse_ids(value) + ids <- ids[!is.na(ids)] + if (length(ids) > 1) stop(sprintf("%s must contain exactly one cohort ID.", label)) + if (length(ids) == 1) return(as.integer(ids[[1]])) + if (!isTRUE(interactive)) stop(sprintf("Missing %s.", label)) + entered <- trimws(readline(sprintf("%s cohort ID: ", label))) + ids <- parse_ids(entered) + ids <- ids[!is.na(ids)] + if (length(ids) != 1) stop(sprintf("%s must contain exactly one cohort ID.", label)) + as.integer(ids[[1]]) + } + + collect_multiple_ids <- function(value, label) { + ids <- parse_ids(value) + ids <- unique(ids[!is.na(ids)]) + if (length(ids) > 0) return(as.integer(ids)) + if (!isTRUE(interactive)) stop(sprintf("Missing %s.", label)) + entered <- trimws(readline(sprintf("%s cohort IDs (comma-separated): ", label))) + ids <- parse_ids(entered) + ids <- unique(ids[!is.na(ids)]) + if (length(ids) == 0) stop(sprintf("%s must include at least one cohort ID.", label)) + as.integer(ids) + } + + collect_optional_single_id <- function(value, label, prompt = NULL) { + ids <- parse_ids(value) + ids <- unique(ids[!is.na(ids)]) + if (length(ids) > 1) stop(sprintf("%s must contain at most one ID.", label)) + if (length(ids) == 1) return(validate_positive_integer(ids[[1]], label)) + if (!isTRUE(interactive)) return(NULL) + entered <- trimws(readline(prompt %||% sprintf("%s ID [optional]: ", label))) + if (!nzchar(entered)) return(NULL) + ids <- parse_ids(entered) + ids <- unique(ids[!is.na(ids)]) + if (length(ids) != 1) stop(sprintf("%s must contain at most one ID.", label)) + validate_positive_integer(ids[[1]], label) + } + + prompt_yesno_strict <- function(prompt, default = TRUE) { + if (!isTRUE(interactive)) return(default) + suffix <- if (default) "[Y/n]" else "[y/N]" + options <- list( + yes = c("y", "yes", "true", "t"), + no = c("n", "no", "false", "f") + ) + + repeat { + prompt_text <- trimws(as.character(prompt %||% "")) + rendered_prompt <- if (nzchar(prompt_text)) sprintf("%s %s ", prompt_text, suffix) else sprintf("%s ", suffix) + entered <- tolower(trimws(readline(rendered_prompt))) + if (entered == "") return(default) + if (entered %in% options$yes) return(TRUE) + if (entered %in% options$no) return(FALSE) + cat("Please answer with y/yes or n/no.\n") + } + } + + prompt_non_null_text <- function(prompt, default = NULL) { + if (!isTRUE(interactive)) return(default) + repeat { + default_value <- if (is.null(default)) "" else as.character(default) + entered <- trimws(readline(sprintf("%s [%s]: ", prompt, default_value))) + if (entered == "" && !is.null(default)) return(default) + if (entered == "") { + cat("A value is required.\n") + next + } + return(entered) + } + } + + prompt_bool <- function(prompt, default = TRUE) { + prompt_yesno_strict(prompt, default = default) + } + + prompt_integer <- function(prompt, default = NULL, allow_null = FALSE, must_be_positive = FALSE, allow_negative = TRUE) { + if (!isTRUE(interactive)) { + if (is.null(default)) return(NULL) + if (is.na(default) && allow_null) return(NULL) + return(as.integer(default)) + } + prompt_suffix <- if (is.null(default)) "" else sprintf(" [%s]", default) + repeat { + prompt_text <- trimws(as.character(prompt %||% "")) + rendered_prompt <- if (nzchar(prompt_text)) sprintf("%s%s: ", prompt_text, prompt_suffix) else sprintf("%s: ", prompt_suffix) + entered <- trimws(readline(rendered_prompt)) + if (entered == "") { + if (allow_null) return(NULL) + if (is.null(default)) { + cat("A value is required.\n") + next + } + return(as.integer(default)) + } + value <- suppressWarnings(as.integer(entered)) + if (is.na(value) || !is.finite(value)) { + cat("Please enter a valid integer.\n") + next + } + if (must_be_positive && value <= 0) { + cat("Please enter a positive integer.\n") + next + } + if (!allow_negative && value < 0) { + cat("Please enter a non-negative integer.\n") + next + } + return(value) + } + } + + prompt_numeric <- function(prompt, default = NULL, must_be_positive = TRUE) { + if (!isTRUE(interactive)) { + if (is.null(default)) return(NULL) + return(as.numeric(default)) + } + prompt_suffix <- if (is.null(default)) "" else sprintf(" [%s]", default) + repeat { + prompt_text <- trimws(as.character(prompt %||% "")) + rendered_prompt <- if (nzchar(prompt_text)) sprintf("%s%s: ", prompt_text, prompt_suffix) else sprintf("%s: ", prompt_suffix) + entered <- trimws(readline(rendered_prompt)) + if (entered == "") { + if (is.null(default)) { + cat("A value is required.\n") + next + } + return(as.numeric(default)) + } + value <- suppressWarnings(as.numeric(entered)) + if (is.na(value) || !is.finite(value)) { + cat("Please enter a valid number.\n") + next + } + if (must_be_positive && value <= 0) { + cat("Please enter a positive number.\n") + next + } + return(value) + } + } + + prompt_enum <- function(prompt, choices, default = NULL) { + normalized_choices <- tolower(trimws(choices)) + if (!isTRUE(interactive)) { + return(if (is.null(default)) choices[[1]] else default) + } + + if (!is.null(default)) { + default <- as.character(default) + default_norm <- tolower(trimws(default)) + default <- if (default_norm %in% normalized_choices) { + choices[[which(normalized_choices == default_norm)[1]]] + } else { + choices[[1]] + } + } else { + default <- choices[[1]] + } + + repeat { + prompt_text <- trimws(as.character(prompt %||% "")) + rendered_prompt <- if (nzchar(prompt_text)) sprintf("%s [%s]: ", prompt_text, default) else sprintf("[%s]: ", default) + entered <- trimws(readline(rendered_prompt)) + if (entered == "") { + return(default) + } + entered_norm <- tolower(trimws(entered)) + match_index <- which(normalized_choices == entered_norm) + if (length(match_index) != 1) { + cat(sprintf("Please enter one of: %s\n", paste(choices, collapse = ", "))) + next + } + return(choices[[match_index[1]]]) + } + } + + collect_outcome_ids <- function(value) { + ids <- parse_ids(value) + ids <- unique(ids[!is.na(ids)]) + if (length(ids) > 0) return(as.integer(ids)) + if (!isTRUE(interactive)) stop("Missing Outcome.") + + collected <- integer(0) + repeat { + entered <- trimws(readline("Outcome cohort ID: ")) + parsed <- parse_ids(entered) + parsed <- parsed[!is.na(parsed)] + if (length(parsed) != 1) { + cat("Please enter exactly one outcome cohort ID.\n") + next + } + + outcome_id <- as.integer(parsed[[1]]) + if (outcome_id %in% collected) { + cat(sprintf("Outcome cohort ID %s is already selected.\n", outcome_id)) + } else { + collected <- c(collected, outcome_id) + } + + add_another <- prompt_yesno("Add another outcome cohort id?", default = FALSE) + if (!isTRUE(add_another)) break + } + + if (length(collected) == 0) stop("Outcome must include at least one cohort ID.") + as.integer(collected) + } + + load_catalog <- function(index_dir) { + catalog_path <- file.path(index_dir, "catalog.jsonl") + if (!file.exists(catalog_path)) { + return(data.frame( + cohortId = integer(0), + name = character(0), + short_description = character(0), + stringsAsFactors = FALSE + )) + } + lines <- readLines(catalog_path, warn = FALSE) + lines <- lines[nzchar(trimws(lines))] + if (length(lines) == 0) { + return(data.frame( + cohortId = integer(0), + name = character(0), + short_description = character(0), + stringsAsFactors = FALSE + )) + } + parsed <- lapply(lines, function(line) jsonlite::fromJSON(line, simplifyVector = TRUE)) + parse_catalog_cohort_id <- function(x) { + direct <- suppressWarnings(as.integer(x$cohortId %||% NA_integer_)) + if (!is.na(direct)) return(direct) + phenotype_id <- as.character(x$phenotype_id %||% "") + if (grepl("^ohdsi:[0-9]+$", phenotype_id)) { + return(suppressWarnings(as.integer(sub("^ohdsi:", "", phenotype_id)))) + } + suppressWarnings(as.integer(phenotype_id)) + } + data.frame( + cohortId = vapply(parsed, parse_catalog_cohort_id, integer(1)), + name = vapply(parsed, function(x) x$cohortName %||% x$phenotype_name %||% x$name %||% "", character(1)), + short_description = vapply(parsed, function(x) x$short_description %||% "", character(1)), + stringsAsFactors = FALSE + ) + } + + recommendation_name <- function(rec) { + first_nonempty(rec$cohortName, rec$phenotype_name, rec$name, "") + } + + recommendation_cohort_id <- function(rec) { + direct <- suppressWarnings(as.integer(rec$cohortId %||% NA_integer_)) + if (!is.na(direct)) return(direct) + phenotype_id <- as.character(rec$phenotype_id %||% "") + if (grepl("^ohdsi:[0-9]+$", phenotype_id)) { + return(suppressWarnings(as.integer(sub("^ohdsi:", "", phenotype_id)))) + } + suppressWarnings(as.integer(phenotype_id)) + } + + lookup_catalog_value <- function(cohort_id, catalog_df, field = "name", fallback = NULL) { + idx <- which(catalog_df$cohortId == as.integer(cohort_id))[1] + if (!is.na(idx)) { + value <- catalog_df[[field]][[idx]] + if (!is.null(value) && nzchar(trimws(value))) return(value) + } + fallback %||% sprintf("Cohort %s", cohort_id) + } + + format_cohort_selection_summary <- function(selected_ids, catalog_df) { + ids <- as.integer(unique(selected_ids[!is.na(selected_ids)])) + if (length(ids) == 0) return(NULL) + labels <- vapply(ids, function(id) { + sprintf( + "%s (ID %s)", + lookup_catalog_value(id, catalog_df, "name", sprintf("Cohort %s", id)), + id + ) + }, character(1)) + paste(labels, collapse = ", ") + } + + cache_label_with_selection <- function(label, selected_ids, catalog_df) { + selection_summary <- format_cohort_selection_summary(selected_ids, catalog_df) + if (is.null(selection_summary) || !nzchar(trimws(selection_summary))) return(label) + sprintf("%s [%s]", label, selection_summary) + } + + load_cached_role_selection <- function(map_path, role, role_dir) { + if (!file.exists(map_path) || !dir.exists(role_dir)) return(NULL) + payload <- tryCatch(read_json(map_path), error = function(e) NULL) + if (is.null(payload)) return(NULL) + mapping <- payload$mapping %||% payload + is_row_mapping <- is.list(mapping) && + length(mapping) > 0 && + is.list(mapping[[1]]) && + any(names(mapping[[1]]) %in% c("role", "original_id", "cohort_id")) + if (isTRUE(is_row_mapping)) { + roles <- vapply(mapping, function(item) as.character(item$role %||% NA_character_), character(1)) + original_ids <- vapply(mapping, function(item) as.integer(item$original_id %||% NA_integer_), integer(1)) + cohort_ids <- vapply(mapping, function(item) as.integer(item$cohort_id %||% NA_integer_), integer(1)) + } else { + roles <- as.character(unlist(mapping$role %||% character(0), use.names = FALSE)) + original_ids <- as.integer(unlist(mapping$original_id %||% integer(0), use.names = FALSE)) + cohort_ids <- as.integer(unlist(mapping$cohort_id %||% integer(0), use.names = FALSE)) + } + if (!length(roles) || length(roles) != length(original_ids) || length(roles) != length(cohort_ids)) { + return(NULL) + } + keep <- which(roles == role & !is.na(original_ids) & !is.na(cohort_ids)) + if (length(keep) == 0) return(NULL) + selected_ids <- as.integer(unique(original_ids[keep])) + new_ids <- as.integer(cohort_ids[keep]) + cached_files <- file.path(role_dir, sprintf("%s.json", new_ids)) + if (!all(file.exists(cached_files))) return(NULL) + list(selected_ids = selected_ids, new_ids = new_ids) + } + + prompt_statement <- function(label, default = NULL) { + if (!isTRUE(interactive)) return(default) + default_value <- trimws(as.character(default %||% "")) + entered <- readline(sprintf("%s statement [%s]: ", label, default_value)) + if (nzchar(trimws(entered))) trimws(entered) else default_value + } + + ensure_acp_ready <- function(url) { + has_acp_state <- exists("acp_state", inherits = TRUE) + has_acp_connect <- exists("acp_connect", mode = "function", inherits = TRUE) + has_acp_post <- exists(".acp_post", mode = "function", inherits = TRUE) + if (!has_acp_state || !has_acp_connect || !has_acp_post) return(FALSE) + acp_state_value <- get("acp_state", inherits = TRUE) + if (!is.null(acp_state_value$url)) return(TRUE) + if (is.null(url) || !nzchar(trimws(url))) return(FALSE) + tryCatch({ + acp_connect(url) + TRUE + }, error = function(e) { + FALSE + }) + } + + collect_recommendation_selection <- function(recommendations, role_label, allow_multiple = FALSE) { + if (length(recommendations) == 0) return(integer(0)) + if (!isTRUE(interactive)) { + if (isTRUE(allow_multiple)) { + return(as.integer(vapply(recommendations, recommendation_cohort_id, integer(1)))) + } + return(as.integer(recommendation_cohort_id(recommendations[[1]]))) + } + + labels <- vapply(seq_along(recommendations), function(i) { + rec <- recommendations[[i]] + cohort_id <- recommendation_cohort_id(rec) + cohort_id_label <- if (is.na(cohort_id)) "?" else as.character(cohort_id) + sprintf("%s (ID %s)", recommendation_name(rec), cohort_id_label) + }, character(1)) + picks <- utils::select.list( + labels, + multiple = isTRUE(allow_multiple), + title = sprintf("Select %s phenotype%s", tolower(role_label), if (isTRUE(allow_multiple)) "s" else "") + ) + if (!length(picks) || !any(nzchar(picks))) return(integer(0)) + selected_ids <- vapply(picks, function(label) { + idx <- which(labels == label)[1] + recommendation_cohort_id(recommendations[[idx]]) + }, numeric(1)) + as.integer(selected_ids[!is.na(selected_ids)]) + } + + run_role_recommendation <- function(role_label, + statement, + output_path, + top_k, + max_results, + candidate_limit, + allow_multiple = FALSE, + preferred_selected_ids = NULL, + preferred_selection_source = "manual_input", + cached_selected_ids = NULL, + selected_cache_label = NULL, + selected_cache_dir = NULL, + cohort_method_cache = NULL, + incidence_cache = NULL) { + role_key <- tolower(role_label) + preferred_selected_ids <- normalize_selected_ids( + preferred_selected_ids, + sprintf("%s cohort ID%s", role_label, if (isTRUE(allow_multiple)) "s" else ""), + allow_multiple = allow_multiple + ) + if (length(preferred_selected_ids) > 0) { + return(list( + selected_ids = preferred_selected_ids, + selection_source = preferred_selection_source %||% "manual_input", + recommendation_path = json_string_or_null(if (file.exists(output_path)) output_path else NULL), + recommendation_source = "not_run", + used_cached_recommendation = FALSE, + used_cached_selection = FALSE, + used_window2 = FALSE, + used_advice = FALSE, + statement = statement + )) + } + selected_cache_ok <- !is.null(cohort_method_cache$selection$selected_ids) && + length(cohort_method_cache$selection$selected_ids) > 0 && + !is.null(cohort_method_cache$selection$cache_dir) && + dir.exists(cohort_method_cache$selection$cache_dir) + if (isTRUE(selected_cache_ok)) { + cached_selected_ids <- as.integer(unique(cohort_method_cache$selection$selected_ids)) + if (maybe_use_cache( + cohort_method_cache$selection$cache_dir, + cache_label_with_selection( + selected_cache_label %||% sprintf("%s cohort selection", role_key), + cached_selected_ids, + catalog_df + ) + )) { + return(list( + selected_ids = cached_selected_ids, + selection_source = "cohort_method_cached_selected_cohort", + recommendation_path = json_string_or_null(if (file.exists(output_path)) output_path else NULL), + recommendation_source = if (file.exists(output_path)) "cached_recommendation" else "cached_selected_cohort_only", + used_cached_recommendation = FALSE, + used_cached_selection = TRUE, + used_window2 = FALSE, + used_advice = FALSE, + statement = statement + )) + } + } + incidence_cache_ok <- !is.null(incidence_cache$selection$selected_ids) && + length(incidence_cache$selection$selected_ids) > 0 && + !is.null(incidence_cache$selection$cache_dir) && + dir.exists(incidence_cache$selection$cache_dir) + if (!isTRUE(selected_cache_ok) && isTRUE(incidence_cache_ok)) { + incidence_selected_ids <- as.integer(unique(incidence_cache$selection$selected_ids)) + if (maybe_use_cache( + incidence_cache$selection$cache_dir, + cache_label_with_selection( + incidence_cache$selection$label %||% sprintf("incidence %s cohort selection", role_key), + incidence_selected_ids, + catalog_df + ) + )) { + return(list( + selected_ids = incidence_selected_ids, + selection_source = "incidence_cached_selected_cohort", + recommendation_path = json_string_or_null(if (file.exists(output_path)) output_path else NULL), + recommendation_source = "incidence_cached_selected_cohort_only", + used_cached_recommendation = FALSE, + used_cached_selection = TRUE, + used_window2 = FALSE, + used_advice = FALSE, + statement = statement + )) + } + } + + recommendation_response <- NULL + recommendation_path <- output_path + used_cached_recommendation <- FALSE + used_window2 <- FALSE + used_advice <- FALSE + + if (maybe_use_cache(output_path, sprintf("%s recommendations", role_key))) { + recommendation_response <- read_json(output_path) + used_cached_recommendation <- TRUE + } else if (ensure_acp_ready(acpUrl)) { + body <- list( + study_intent = statement, + top_k = top_k, + max_results = max_results, + candidate_limit = candidate_limit + ) + message(sprintf("Calling ACP flow: phenotype_recommendation (%s)", role_key)) + recommendation_response <- tryCatch( + .acp_post("/flows/phenotype_recommendation", body), + error = function(e) { + list(status = "error", error = conditionMessage(e)) + } + ) + write_json(recommendation_response, output_path) + } + + recommendations_core <- recommendation_response$recommendations %||% recommendation_response + recommendations <- recommendations_core$phenotype_recommendations %||% list() + + if (isTRUE(interactive) && length(recommendations) > 0) { + cat(sprintf("\n== %s Phenotype Recommendations ==\n", role_label)) + for (i in seq_along(recommendations)) { + rec <- recommendations[[i]] + cohort_id <- recommendation_cohort_id(rec) + cohort_id_label <- if (is.na(cohort_id)) "?" else as.character(cohort_id) + cat(sprintf("%d. %s (ID %s)\n", i, recommendation_name(rec), cohort_id_label)) + if (!is.null(rec$justification)) cat(sprintf(" %s\n", rec$justification)) + } + ok_any <- prompt_yesno(sprintf("Are any of these acceptable for the %s?", role_key), default = TRUE) + if (!ok_any && ensure_acp_ready(acpUrl)) { + widen <- prompt_yesno("Widen candidate pool and try again?", default = TRUE) + if (isTRUE(widen)) { + used_window2 <- TRUE + recommendation_path <- file.path(dirname(output_path), sprintf("%s_window2.json", tools::file_path_sans_ext(basename(output_path)))) + body <- list( + study_intent = statement, + top_k = top_k, + max_results = max_results, + candidate_limit = candidate_limit, + candidate_offset = candidate_limit + ) + message(sprintf("Calling ACP flow: phenotype_recommendation (%s window 2)", role_key)) + recommendation_response <- tryCatch( + .acp_post("/flows/phenotype_recommendation", body), + error = function(e) { + list(status = "error", error = conditionMessage(e)) + } + ) + write_json(recommendation_response, recommendation_path) + recommendations_core <- recommendation_response$recommendations %||% recommendation_response + recommendations <- recommendations_core$phenotype_recommendations %||% list() + cat(sprintf("\n== %s Phenotype Recommendations (window 2) ==\n", role_label)) + for (i in seq_along(recommendations)) { + rec <- recommendations[[i]] + cohort_id <- recommendation_cohort_id(rec) + cohort_id_label <- if (is.na(cohort_id)) "?" else as.character(cohort_id) + cat(sprintf("%d. %s (ID %s)\n", i, recommendation_name(rec), cohort_id_label)) + if (!is.null(rec$justification)) cat(sprintf(" %s\n", rec$justification)) + } + ok_any <- prompt_yesno(sprintf("Are any of these acceptable for the %s?", role_key), default = TRUE) + } + if (!ok_any) { + used_advice <- TRUE + message(sprintf("Calling ACP flow: phenotype_recommendation_advice (%s)", role_key)) + advice <- tryCatch( + .acp_post("/flows/phenotype_recommendation_advice", list(study_intent = statement)), + error = function(e) { + list(status = "error", error = conditionMessage(e)) + } + ) + advice_core <- advice$advice %||% advice + cat("\n== Advisory guidance ==\n") + cat(advice_core$advice %||% "", "\n") + if (length(advice_core$next_steps %||% list()) > 0) { + cat("Next steps:\n") + for (step in advice_core$next_steps) cat(sprintf(" - %s\n", step)) + } + if (length(advice_core$questions %||% list()) > 0) { + cat("Questions to clarify:\n") + for (q in advice_core$questions) cat(sprintf(" - %s\n", q)) + } + checkpoint_label <- checkpoint_label_for_role_advice(role_label) + mark_checkpoint(checkpoint_label, list( + role_label = role_label, + role_key = role_key, + recommendations_path = recommendation_path, + statement = statement + )) + cat("\nHint: rerun with resume=TRUE after updating phenotypes to continue.\n") + stop(sprintf( + "Stopping after %s advice. Resume with resume=TRUE once phenotypes are updated.", + role_key + )) + } + } + } + + selected_ids <- collect_recommendation_selection(recommendations, role_label, allow_multiple = allow_multiple) + selected_ids <- as.integer(unique(selected_ids[!is.na(selected_ids)])) + + list( + selected_ids = selected_ids, + selection_source = if (length(selected_ids) > 0) "recommendation" else "none", + recommendation_path = json_string_or_null(if (file.exists(recommendation_path)) recommendation_path else NULL), + recommendation_source = if (used_cached_recommendation) "cached_recommendation" else if (!is.null(recommendation_response)) "acp_flow" else "not_run", + used_cached_recommendation = isTRUE(used_cached_recommendation), + used_cached_selection = FALSE, + used_window2 = isTRUE(used_window2), + used_advice = isTRUE(used_advice), + statement = statement + ) + } + + copy_cohort_json_multi <- function(source_id, dest_id, dest_dirs, index_def_dir) { + src <- file.path(index_def_dir, sprintf("%s.json", source_id)) + if (!file.exists(src)) stop(sprintf("Cohort JSON not found: %s", src)) + dests <- character(0) + for (dest_dir in dest_dirs) { + ensure_dir(dest_dir) + dest <- file.path(dest_dir, sprintf("%s.json", dest_id)) + file.copy(src, dest, overwrite = TRUE) + dests <- c(dests, dest) + } + dests + } + + run_role_improvements <- function(role_key, + role_label, + cohort_ids, + selected_role_dir, + patched_role_dir, + improvements_path, + role_statement = NULL) { + cohort_ids <- as.integer(cohort_ids) + cohort_ids <- cohort_ids[!is.na(cohort_ids)] + response_by_id <- list() + used_cache <- FALSE + applied_ids <- integer(0) + patched_paths <- character(0) + errors <- list() + flow_called <- FALSE + expected_meta <- list( + role = role_key, + cohort_ids = as.list(cohort_ids), + role_statement = role_statement %||% "", + study_intent = studyIntent + ) + + if (length(cohort_ids) == 0) { + return(list( + role = role_key, + status = "not_applicable", + cohort_ids = as.list(integer(0)), + improvements_path = improvements_path, + used_cache = FALSE, + flow_called = FALSE, + auto_apply = isTRUE(autoApplyImprovements), + applied = FALSE, + applied_ids = as.list(integer(0)), + patched_dir = patched_role_dir, + patched_paths = as.list(character(0)), + errors = list() + )) + } + + if (maybe_use_cache(improvements_path, sprintf("%s improvements", role_key))) { + cached_response <- read_json(improvements_path) + cached_meta <- cached_response$`_meta` %||% list() + cached_ids <- as.integer(unlist(cached_meta$cohort_ids %||% integer(0), use.names = FALSE)) + cache_valid <- identical(as.character(cached_meta$role %||% ""), role_key) && + identical(cached_ids, cohort_ids) && + identical(as.character(cached_meta$role_statement %||% ""), as.character(expected_meta$role_statement)) && + identical(as.character(cached_meta$study_intent %||% ""), as.character(studyIntent)) + if (isTRUE(cache_valid)) { + response_by_id <- cached_response[names(cached_response) != "_meta"] + used_cache <- TRUE + if (interactive) { + cat(sprintf("\nLoaded cached %s improvements from %s\n", role_key, improvements_path)) + } + } else if (interactive) { + cat(sprintf("\nIgnoring stale cached %s improvements at %s\n", role_key, improvements_path)) + } + } + if (!isTRUE(used_cache) && ensure_acp_ready(acpUrl)) { + for (cid in cohort_ids) { + cohort_path <- file.path(selected_role_dir, sprintf("%s.json", cid)) + cohort_obj <- read_json(cohort_path) + cohort_obj$id <- cid + body <- list( + protocol_text = studyIntent, + role = role_key, + role_statement = role_statement %||% "", + cohorts = list(cohort_obj) + ) + message(sprintf("Calling ACP flow: phenotype_improvements (%s cohort %s)", role_key, cid)) + flow_called <- TRUE + response_by_id[[as.character(cid)]] <- tryCatch( + .acp_post("/flows/phenotype_improvements", body), + error = function(e) { + err <- list( + status = "error", + error = conditionMessage(e), + flow = "phenotype_improvements", + role = role_key, + cohort_id = as.integer(cid) + ) + errors[[as.character(cid)]] <<- err + err + } + ) + } + response_with_meta <- c(list(`_meta` = expected_meta), response_by_id) + write_json(response_with_meta, improvements_path) + } else if (!isTRUE(used_cache)) { + errors[["acp"]] <- list( + status = "skipped", + error = "ACP bridge unavailable, ACP helpers not loaded, or ACP not connected.", + flow = "phenotype_improvements", + role = role_key + ) + write_json(c(list(`_meta` = expected_meta), response_by_id), improvements_path) + } + + for (cid in names(response_by_id)) { + if (identical(cid, "_meta")) next + resp <- response_by_id[[cid]] + core <- resp$full_result %||% resp + items <- core$phenotype_improvements %||% list() + if (interactive) { + cat(sprintf("\n== Improvements for %s cohort %s ==\n", role_key, cid)) + for (item in items) { + cat(sprintf("- %s\n", item$summary %||% "(no summary)")) + if (!is.null(item$actions)) { + for (act in item$actions) { + cat(sprintf(" action: %s %s\n", act$type %||% "set", act$path %||% "")) + } + } + } + if (length(items) == 0) { + cat(" No improvements returned for this cohort.\n") + } + } + if (length(items) == 0) next + + should_apply <- FALSE + if (interactive) { + should_apply <- prompt_yesno(sprintf("Apply improvements for %s cohort %s now?", role_key, cid), default = FALSE) + } else { + should_apply <- isTRUE(autoApplyImprovements) + } + if (!isTRUE(should_apply)) next + + cohort_path <- file.path(selected_role_dir, sprintf("%s.json", cid)) + cohort_obj <- read_json(cohort_path) + mutation_count <- 0L + for (item in items) { + if (is.null(item$actions)) next + for (act in item$actions) { + if (is_mutating_improvement_action(act)) { + cohort_obj <- apply_action(cohort_obj, act) + mutation_count <- mutation_count + 1L + } + } + } + if (mutation_count == 0L) next + ensure_dir(patched_role_dir) + ensure_dir(patched_dir) + out_path <- file.path(patched_role_dir, sprintf("%s.json", cid)) + write_json(cohort_obj, out_path) + file.copy(out_path, file.path(patched_dir, sprintf("%s.json", cid)), overwrite = TRUE) + applied_ids <- c(applied_ids, as.integer(cid)) + patched_paths <- c(patched_paths, out_path) + if (interactive) { + cat(sprintf("Patched %s cohort saved: %s\n", role_key, out_path)) + } + } + + status <- if (length(errors) > 0 && length(response_by_id) == 0) { + "skipped" + } else if (length(errors) > 0) { + "completed_with_errors" + } else if (length(response_by_id) > 0) { + "completed" + } else { + "not_run" + } + + list( + role = role_key, + label = role_label, + status = status, + cohort_ids = as.list(cohort_ids), + improvements_path = improvements_path, + used_cache = isTRUE(used_cache), + flow_called = isTRUE(flow_called), + auto_apply = isTRUE(autoApplyImprovements), + applied = length(applied_ids) > 0, + applied_ids = as.list(as.integer(unique(applied_ids))), + patched_dir = patched_role_dir, + patched_paths = as.list(patched_paths), + errors = errors + ) + } + + skipped_role_improvements <- function(role_key, + role_label, + cohort_ids, + patched_role_dir, + improvements_path, + reason = "user_skipped") { + cohort_ids <- as.integer(cohort_ids) + cohort_ids <- cohort_ids[!is.na(cohort_ids)] + list( + role = role_key, + label = role_label, + status = "not_run", + reason = reason, + cohort_ids = as.list(cohort_ids), + improvements_path = improvements_path, + used_cache = FALSE, + flow_called = FALSE, + auto_apply = isTRUE(autoApplyImprovements), + applied = FALSE, + applied_ids = as.list(integer(0)), + patched_dir = patched_role_dir, + patched_paths = as.list(character(0)), + errors = list() + ) + } + + run_role_improvement_gate <- function(role_key, + role_label, + cohort_ids, + selected_role_dir, + patched_role_dir, + improvements_path, + role_statement = NULL) { + do_improvements <- TRUE + if (interactive) { + do_improvements <- prompt_yesno( + sprintf("Continue to %s phenotype improvements?", role_key), + default = TRUE + ) + if (isTRUE(do_improvements)) { + cat(sprintf("\n== %s phenotype improvements ==\n", role_label)) + } + } + result <- if (isTRUE(do_improvements)) { + run_role_improvements( + role_key = role_key, + role_label = role_label, + cohort_ids = cohort_ids, + selected_role_dir = selected_role_dir, + patched_role_dir = patched_role_dir, + improvements_path = improvements_path, + role_statement = role_statement + ) + } else { + skipped_role_improvements( + role_key = role_key, + role_label = role_label, + cohort_ids = cohort_ids, + patched_role_dir = patched_role_dir, + improvements_path = improvements_path + ) + } + result$prompt_choice <- isTRUE(do_improvements) + result + } + + clear_json_files <- function(dir_path) { + if (!dir.exists(dir_path)) return(invisible(FALSE)) + files <- list.files(dir_path, pattern = "\\.(json)$", full.names = TRUE) + if (length(files) > 0) unlink(files, force = TRUE) + invisible(TRUE) + } + clear_sql_files <- function(dir_path) { + sql_dir <- file.path(dir_path, "sql") + if (!dir.exists(sql_dir)) return(invisible(FALSE)) + files <- list.files(sql_dir, pattern = "\\.(sql)$", full.names = TRUE) + if (length(files) > 0) unlink(files, force = TRUE) + invisible(TRUE) + } + clear_patched_role_outputs <- function() { + clear_json_files(patched_dir) + clear_json_files(patched_target_dir) + clear_json_files(patched_comparator_dir) + clear_json_files(patched_outcome_dir) + clear_sql_files(patched_dir) + clear_sql_files(patched_target_dir) + clear_sql_files(patched_comparator_dir) + clear_sql_files(patched_outcome_dir) + } + + write_lines <- function(path, lines) { + writeLines(lines, con = path, useBytes = TRUE) + } + + assert_cohort_json_exists <- function(source_id, index_def_dir, label) { + src <- file.path(index_def_dir, sprintf("%s.json", source_id)) + if (!file.exists(src)) { + stop(sprintf("%s cohort JSON not found: %s", label, src)) + } + invisible(src) + } + + cohort_json_exists <- function(source_id, index_def_dir) { + src <- file.path(index_def_dir, sprintf("%s.json", source_id)) + file.exists(src) + } + + validate_positive_integer <- function(value, label) { + if (length(value) != 1 || is.na(value) || !is.finite(value) || value <= 0) { + stop(sprintf("%s must be a positive integer.", label)) + } + as.integer(value) + } + + json_int_or_null <- function(value) { + if (is.null(value)) return(NA_integer_) + as.integer(value) + } + + json_string_or_null <- function(value) { + if (is.null(value)) return(NA_character_) + as.character(value) + } + + deep_merge <- function(defaults, overrides) { + if (is.null(overrides)) return(defaults) + for (name in names(overrides)) { + override_value <- overrides[[name]] + default_value <- defaults[[name]] + if (is.list(default_value) && is.list(override_value) && !is.data.frame(override_value)) { + defaults[[name]] <- deep_merge(default_value, override_value) + } else if (!is.null(override_value)) { + defaults[[name]] <- override_value + } + } + defaults + } + + validate_choice <- function(value, choices, label) { + if (length(value) != 1 || is.na(value) || !value %in% choices) { + stop(sprintf("%s must be one of: %s", label, paste(choices, collapse = ", "))) + } + as.character(value) + } + + validate_integer_value <- function(value, label, min_value = NULL) { + parsed <- suppressWarnings(as.integer(value)) + if (length(parsed) != 1 || is.na(parsed) || !is.finite(parsed)) { + stop(sprintf("%s must be an integer.", label)) + } + if (!is.null(min_value) && parsed < min_value) { + stop(sprintf("%s must be >= %s.", label, min_value)) + } + as.integer(parsed) + } + + validate_numeric_value <- function(value, label, min_value = NULL) { + parsed <- suppressWarnings(as.numeric(value)) + if (length(parsed) != 1 || is.na(parsed) || !is.finite(parsed)) { + stop(sprintf("%s must be numeric.", label)) + } + if (!is.null(min_value) && parsed < min_value) { + stop(sprintf("%s must be >= %s.", label, min_value)) + } + parsed + } + + validate_logical_value <- function(value, label) { + if (length(value) != 1 || is.na(value) || !is.logical(value)) { + stop(sprintf("%s must be TRUE or FALSE.", label)) + } + isTRUE(value) + } + + normalize_analytic_settings <- function(settings) { + validate_date_or_blank <- function(value, label) { + .studyAgentDateStringOrEmpty(value, label) + } + + normalize_optional_concept_set_id <- function(value, label) { + if (is.null(value) || length(value) == 0) return(NA_integer_) + if (length(value) == 1 && is.na(value)) return(NA_integer_) + if (is.character(value) && length(value) == 1) { + normalized <- tolower(trimws(value)) + if (!nzchar(normalized) || normalized %in% c("na", "null", "none")) return(NA_integer_) + } + parsed <- suppressWarnings(as.integer(value)) + if (length(parsed) != 1 || is.na(parsed) || !is.finite(parsed)) { + stop(sprintf("%s must be a positive integer or blank.", label)) + } + if (parsed == 0L) return(NA_integer_) + validate_positive_integer(parsed, label) + } + + allowed_sections <- c( + "study_population", + "covariate_settings", + "time_at_risk", + "propensity_score_adjustment", + "outcome_model" + ) + section_aliases <- c(covariates = "covariate_settings") + + profile_name <- trimws(as.character(settings$profile_name %||% "")) + if (!nzchar(profile_name)) { + stop("analytic_settings.profile_name must be a non-empty string.") + } + + customized_sections <- as.character(unlist(settings$customized_sections %||% character(0), use.names = FALSE)) + aliased_sections <- unname(section_aliases[customized_sections]) + customized_sections <- ifelse(is.na(aliased_sections), customized_sections, aliased_sections) + customized_sections <- unique(customized_sections[nzchar(customized_sections)]) + invalid_sections <- setdiff(customized_sections, allowed_sections) + if (length(invalid_sections) > 0) { + stop(sprintf( + "analytic_settings.customized_sections contains unsupported values: %s", + paste(invalid_sections, collapse = ", ") + )) + } + + settings$profile_name <- profile_name + settings$source <- "manual_shell" + settings$customized_sections <- customized_sections + settings$get_db_cohort_method_data$studyStartDate <- validate_date_or_blank( + settings$get_db_cohort_method_data$studyStartDate, + "analytic_settings.get_db_cohort_method_data.studyStartDate" + ) + settings$get_db_cohort_method_data$studyEndDate <- validate_date_or_blank( + settings$get_db_cohort_method_data$studyEndDate, + "analytic_settings.get_db_cohort_method_data.studyEndDate" + ) + settings$get_db_cohort_method_data$firstExposureOnly <- validate_logical_value( + settings$get_db_cohort_method_data$firstExposureOnly, + "analytic_settings.get_db_cohort_method_data.firstExposureOnly" + ) + settings$get_db_cohort_method_data$washoutPeriod <- validate_integer_value( + settings$get_db_cohort_method_data$washoutPeriod, + "analytic_settings.get_db_cohort_method_data.washoutPeriod", + min_value = 0L + ) + settings$get_db_cohort_method_data$restrictToCommonPeriod <- validate_logical_value( + settings$get_db_cohort_method_data$restrictToCommonPeriod, + "analytic_settings.get_db_cohort_method_data.restrictToCommonPeriod" + ) + settings$get_db_cohort_method_data$removeDuplicateSubjects <- validate_choice( + settings$get_db_cohort_method_data$removeDuplicateSubjects, + c("keep all", "keep first", "remove all", "keep first, truncate to second"), + "analytic_settings.get_db_cohort_method_data.removeDuplicateSubjects" + ) + settings$create_study_population$removeDuplicateSubjects <- validate_choice( + settings$create_study_population$removeDuplicateSubjects, + c("keep all", "keep first", "remove all"), + "analytic_settings.create_study_population.removeDuplicateSubjects" + ) + settings$create_study_population$maxCohortSize <- validate_integer_value( + settings$create_study_population$maxCohortSize, + "analytic_settings.create_study_population.maxCohortSize", + min_value = 0L + ) + settings$create_study_population$removeSubjectsWithPriorOutcome <- validate_logical_value( + settings$create_study_population$removeSubjectsWithPriorOutcome, + "analytic_settings.create_study_population.removeSubjectsWithPriorOutcome" + ) + settings$create_study_population$priorOutcomeLookback <- validate_integer_value( + settings$create_study_population$priorOutcomeLookback, + "analytic_settings.create_study_population.priorOutcomeLookback", + min_value = 0L + ) + settings$create_study_population$minDaysAtRisk <- validate_integer_value( + settings$create_study_population$minDaysAtRisk, + "analytic_settings.create_study_population.minDaysAtRisk", + min_value = 0L + ) + settings$create_study_population$riskWindowStart <- validate_integer_value( + settings$create_study_population$riskWindowStart, + "analytic_settings.create_study_population.riskWindowStart" + ) + settings$create_study_population$startAnchor <- validate_choice( + settings$create_study_population$startAnchor, + c("cohort start", "cohort end"), + "analytic_settings.create_study_population.startAnchor" + ) + settings$create_study_population$riskWindowEnd <- validate_integer_value( + settings$create_study_population$riskWindowEnd, + "analytic_settings.create_study_population.riskWindowEnd" + ) + settings$create_study_population$endAnchor <- validate_choice( + settings$create_study_population$endAnchor, + c("cohort start", "cohort end"), + "analytic_settings.create_study_population.endAnchor" + ) + settings$create_study_population$censorAtNewRiskWindow <- validate_logical_value( + settings$create_study_population$censorAtNewRiskWindow, + "analytic_settings.create_study_population.censorAtNewRiskWindow" + ) + settings$create_ps$estimator <- validate_choice( + settings$create_ps$estimator, + c("att", "ate"), + "analytic_settings.create_ps.estimator" + ) + settings$create_ps$maxCohortSizeForFitting <- validate_integer_value( + settings$create_ps$maxCohortSizeForFitting, + "analytic_settings.create_ps.maxCohortSizeForFitting", + min_value = 0L + ) + settings$create_ps$errorOnHighCorrelation <- validate_logical_value( + settings$create_ps$errorOnHighCorrelation, + "analytic_settings.create_ps.errorOnHighCorrelation" + ) + settings$create_ps$useRegularization <- validate_logical_value( + settings$create_ps$useRegularization, + "analytic_settings.create_ps.useRegularization" + ) + settings$match_on_ps$caliper <- validate_numeric_value( + settings$match_on_ps$caliper, + "analytic_settings.match_on_ps.caliper", + min_value = 0 + ) + settings$match_on_ps$caliperScale <- validate_choice( + settings$match_on_ps$caliperScale, + c("propensity score", "standardized", "standardized logit"), + "analytic_settings.match_on_ps.caliperScale" + ) + settings$match_on_ps$maxRatio <- validate_integer_value( + settings$match_on_ps$maxRatio, + "analytic_settings.match_on_ps.maxRatio", + min_value = 0L + ) + settings$ps_adjustment$strategy <- validate_choice( + settings$ps_adjustment$strategy, + c("match_on_ps", "stratify_by_ps", "none"), + "analytic_settings.ps_adjustment.strategy" + ) + settings$ps_adjustment$trimmingStrategy <- validate_choice( + settings$ps_adjustment$trimmingStrategy, + c("none", "by_percent", "by_equipoise"), + "analytic_settings.ps_adjustment.trimmingStrategy" + ) + settings$ps_adjustment$trimmingPercent <- validate_numeric_value( + settings$ps_adjustment$trimmingPercent, + "analytic_settings.ps_adjustment.trimmingPercent", + min_value = 0 + ) + if (settings$ps_adjustment$trimmingPercent >= 50) { + stop("analytic_settings.ps_adjustment.trimmingPercent must be < 50.") + } + settings$ps_adjustment$equipoiseLowerBound <- validate_numeric_value( + settings$ps_adjustment$equipoiseLowerBound, + "analytic_settings.ps_adjustment.equipoiseLowerBound", + min_value = 0 + ) + settings$ps_adjustment$equipoiseUpperBound <- validate_numeric_value( + settings$ps_adjustment$equipoiseUpperBound, + "analytic_settings.ps_adjustment.equipoiseUpperBound", + min_value = 0 + ) + if (settings$ps_adjustment$equipoiseLowerBound >= settings$ps_adjustment$equipoiseUpperBound || + settings$ps_adjustment$equipoiseUpperBound > 1) { + stop("analytic_settings.ps_adjustment equipoise bounds must satisfy 0 <= lower < upper <= 1.") + } + settings$stratify_by_ps$numberOfStrata <- validate_integer_value( + settings$stratify_by_ps$numberOfStrata, + "analytic_settings.stratify_by_ps.numberOfStrata", + min_value = 1L + ) + settings$stratify_by_ps$baseSelection <- validate_choice( + settings$stratify_by_ps$baseSelection, + c("all", "target", "comparator"), + "analytic_settings.stratify_by_ps.baseSelection" + ) + settings$fit_outcome_model$modelType <- validate_choice( + settings$fit_outcome_model$modelType, + c("cox", "logistic", "poisson"), + "analytic_settings.fit_outcome_model.modelType" + ) + settings$fit_outcome_model$stratified <- validate_logical_value( + settings$fit_outcome_model$stratified, + "analytic_settings.fit_outcome_model.stratified" + ) + settings$fit_outcome_model$useCovariates <- validate_logical_value( + settings$fit_outcome_model$useCovariates, + "analytic_settings.fit_outcome_model.useCovariates" + ) + settings$fit_outcome_model$inversePtWeighting <- validate_logical_value( + settings$fit_outcome_model$inversePtWeighting, + "analytic_settings.fit_outcome_model.inversePtWeighting" + ) + settings$fit_outcome_model$useRegularization <- validate_logical_value( + settings$fit_outcome_model$useRegularization, + "analytic_settings.fit_outcome_model.useRegularization" + ) + settings$covariate_concept_sets$enabled <- validate_logical_value( + settings$covariate_concept_sets$enabled, + "analytic_settings.covariate_concept_sets.enabled" + ) + settings$covariate_concept_sets$include_all_concepts <- validate_logical_value( + settings$covariate_concept_sets$include_all_concepts, + "analytic_settings.covariate_concept_sets.include_all_concepts" + ) + + include_id <- settings$covariate_concept_sets$include_concept_set_id + exclude_id <- settings$covariate_concept_sets$exclude_concept_set_id + settings$covariate_concept_sets$include_concept_set_id <- normalize_optional_concept_set_id( + include_id, + "analytic_settings.covariate_concept_sets.include_concept_set_id" + ) + settings$covariate_concept_sets$exclude_concept_set_id <- normalize_optional_concept_set_id( + exclude_id, + "analytic_settings.covariate_concept_sets.exclude_concept_set_id" + ) + + settings + } + + collect_text_value <- function(value, prompt, default = "") { + current <- value %||% default + if (!isTRUE(interactive)) return(current) + entered <- readline(sprintf("%s [%s]: ", prompt, current)) + if (nzchar(trimws(entered))) entered else current + } + + collect_choice_value <- function(value, label, choices, prompt = NULL, default = NULL) { + current <- value %||% default %||% choices[[1]] + if (!current %in% choices) current <- default %||% choices[[1]] + if (!isTRUE(interactive)) return(current) + + cat(sprintf("%s\n", prompt %||% label)) + for (i in seq_along(choices)) { + marker <- if (identical(choices[[i]], current)) " [default]" else "" + cat(sprintf(" %s. %s%s\n", i, choices[[i]], marker)) + } + + repeat { + entered <- trimws(readline(sprintf("Select option [%s]: ", match(current, choices)))) + if (!nzchar(entered)) return(current) + option_idx <- suppressWarnings(as.integer(entered)) + if (!is.na(option_idx) && option_idx >= 1 && option_idx <= length(choices)) { + return(choices[[option_idx]]) + } + if (entered %in% choices) return(entered) + cat(sprintf("Please enter one of: %s\n", paste(seq_along(choices), collapse = ", "))) + } + } + + collect_integer_value <- function(value, label, prompt, default = NULL, min_value = NULL) { + current <- value %||% default + current <- validate_integer_value(current, label, min_value = min_value) + if (!isTRUE(interactive)) return(current) + + repeat { + entered <- trimws(readline(sprintf("%s [%s]: ", prompt, current))) + if (!nzchar(entered)) return(current) + parsed <- suppressWarnings(as.integer(entered)) + if (!is.na(parsed) && (is.null(min_value) || parsed >= min_value)) { + return(as.integer(parsed)) + } + if (is.null(min_value)) { + cat(sprintf("%s must be an integer.\n", label)) + } else { + cat(sprintf("%s must be an integer >= %s.\n", label, min_value)) + } + } + } + + collect_numeric_value <- function(value, label, prompt, default = NULL, min_value = NULL) { + current <- value %||% default + current <- validate_numeric_value(current, label, min_value = min_value) + if (!isTRUE(interactive)) return(current) + + repeat { + entered <- trimws(readline(sprintf("%s [%s]: ", prompt, format(current, trim = TRUE, scientific = FALSE)))) + if (!nzchar(entered)) return(current) + parsed <- suppressWarnings(as.numeric(entered)) + if (!is.na(parsed) && (is.null(min_value) || parsed >= min_value)) { + return(parsed) + } + if (is.null(min_value)) { + cat(sprintf("%s must be numeric.\n", label)) + } else { + cat(sprintf("%s must be numeric >= %s.\n", label, min_value)) + } + } + } + + review_analytic_settings_interactively <- function(settings) { + if (!isTRUE(interactive)) return(settings) + section_paths <- .studyAgentAnalyticSettingsSectionPaths() + repeat { + .studyAgentPrintFinalSettingsSummary(settings, section_paths) + if (prompt_yesno_strict("Use these analytic settings?", default = TRUE)) { + return(settings) + } + + review_path <- file.path(output_dir, "analytic_settings_review.txt") + .studyAgentWriteAnalyticSettingsReviewFile(settings, section_paths, review_path) + cat(sprintf("\nOpening analytic settings review file:\n %s\n", review_path)) + cat("Edit values after ':', save the file, close the editor, then return here.\n") + tryCatch( + utils::file.edit(review_path), + error = function(e) { + cat(sprintf("Could not open editor automatically: %s\n", conditionMessage(e))) + cat("Open the file manually, edit it, save it, then return here.\n") + } + ) + + repeat { + entered <- tolower(trimws(readline("Press Enter after saving, or type 'r' to reopen the file: "))) + if (identical(entered, "r")) { + tryCatch( + utils::file.edit(review_path), + error = function(e) cat(sprintf("Could not open editor: %s\n", conditionMessage(e))) + ) + next + } + break + } + + parsed <- tryCatch( + normalize_analytic_settings(.studyAgentReadAnalyticSettingsReviewFile(review_path, settings)), + error = function(e) e + ) + if (inherits(parsed, "error")) { + cat(sprintf("\nAnalytic settings validation failed: %s\n", conditionMessage(parsed))) + cat("Please edit the review file and try again.\n") + next + } + settings <- parsed + } + } + + flatten_named_values <- function(x, prefix = NULL) { + if (is.list(x) && !is.data.frame(x)) { + pieces <- unlist( + lapply(names(x), function(name) { + key <- if (is.null(prefix) || !nzchar(prefix)) name else paste(prefix, name, sep = ".") + flatten_named_values(x[[name]], key) + }), + recursive = FALSE, + use.names = FALSE + ) + return(pieces) + } + + value <- if (length(x) == 0 || all(is.na(x))) { + "null" + } else if (length(x) > 1) { + paste(as.character(x), collapse = ", ") + } else { + as.character(x) + } + + stats::setNames(list(value), prefix %||% "value") + } + + shell_settings_from_acp_recommendation <- function(recommendation, defaults_snapshot) { + settings <- defaults_snapshot %||% list() + settings$profile_name <- as.character( + recommendation$profile_name %||% settings$profile_name %||% "Recommended from ACP" + ) + settings$source <- "acp_flow" + + study_population <- recommendation$study_population %||% list() + cohort_method_data_args <- study_population$cohortMethodDataArgs %||% list() + if (length(cohort_method_data_args) > 0) { + settings$get_db_cohort_method_data <- utils::modifyList( + settings$get_db_cohort_method_data %||% list(), + cohort_method_data_args + ) + } + + create_study_population <- study_population + create_study_population$cohortMethodDataArgs <- NULL + time_at_risk <- recommendation$time_at_risk %||% list() + if (length(create_study_population) > 0 || length(time_at_risk) > 0) { + settings$create_study_population <- utils::modifyList( + settings$create_study_population %||% list(), + c(create_study_population, time_at_risk) + ) + } + + ps <- recommendation$propensity_score_adjustment %||% list() + create_ps <- ps$createPsArgs %||% list() + if (length(create_ps) > 0) { + settings$create_ps <- utils::modifyList( + settings$create_ps %||% list(), + list( + maxCohortSizeForFitting = create_ps$maxCohortSizeForFitting %||% settings$create_ps$maxCohortSizeForFitting, + errorOnHighCorrelation = create_ps$errorOnHighCorrelation %||% settings$create_ps$errorOnHighCorrelation, + useRegularization = !is.null(create_ps$prior) + ) + ) + } + + trim_args <- ps$trimByPsArgs + match_args <- ps$matchOnPsArgs + stratify_args <- ps$stratifyByPsArgs + settings$ps_adjustment <- utils::modifyList( + settings$ps_adjustment %||% list(), + list( + strategy = if (!is.null(match_args)) { + "match_on_ps" + } else if (!is.null(stratify_args)) { + "stratify_by_ps" + } else { + "none" + }, + trimmingStrategy = if (is.null(trim_args)) { + "none" + } else if (!is.null(trim_args$equipoiseBounds)) { + "by_equipoise" + } else { + "by_percent" + }, + trimmingPercent = if (!is.null(trim_args$trimFraction)) { + as.numeric(trim_args$trimFraction) * 100 + } else { + settings$ps_adjustment$trimmingPercent %||% 5 + }, + equipoiseLowerBound = if (!is.null(trim_args$equipoiseBounds) && length(trim_args$equipoiseBounds) >= 1) { + as.numeric(trim_args$equipoiseBounds[[1]]) + } else { + settings$ps_adjustment$equipoiseLowerBound %||% 0.25 + }, + equipoiseUpperBound = if (!is.null(trim_args$equipoiseBounds) && length(trim_args$equipoiseBounds) >= 2) { + as.numeric(trim_args$equipoiseBounds[[2]]) + } else { + settings$ps_adjustment$equipoiseUpperBound %||% 0.75 + } + ) + ) + if (!is.null(match_args)) { + settings$match_on_ps <- utils::modifyList(settings$match_on_ps %||% list(), match_args) + } + if (!is.null(stratify_args)) { + settings$stratify_by_ps <- utils::modifyList(settings$stratify_by_ps %||% list(), stratify_args) + } + + outcome_model <- recommendation$outcome_model %||% list() + if (length(outcome_model) > 0) { + settings$fit_outcome_model <- utils::modifyList( + settings$fit_outcome_model %||% list(), + list( + modelType = outcome_model$modelType %||% settings$fit_outcome_model$modelType, + stratified = outcome_model$stratified %||% settings$fit_outcome_model$stratified, + useCovariates = outcome_model$useCovariates %||% settings$fit_outcome_model$useCovariates, + inversePtWeighting = outcome_model$inversePtWeighting %||% settings$fit_outcome_model$inversePtWeighting, + useRegularization = !is.null(outcome_model$prior) + ) + ) + } + + settings + } + + print_analytic_settings_recommendation_preview <- function(acp_response, + recommendation, + recommendation_path = NULL, + acp_response_path = NULL) { + response <- acp_response$response %||% list() + diagnostics <- response$diagnostics %||% acp_response$diagnostics %||% list() + source <- as.character(acp_response$source %||% recommendation$source %||% "unknown") + wrapper_status <- as.character(acp_response$status %||% "unknown") + flow_status <- as.character(response$status %||% wrapper_status) + + acp_success <- identical(source, "acp_flow") && identical(flow_status, "ok") + + if (isTRUE(acp_success)) { + cat("\nAnalytic settings recommendation from ACP\n") + failed_sections <- diagnostics$failed_sections %||% list() + if (length(failed_sections) > 0) { + cat(sprintf(" - Backfilled sections: %s\n", paste(unlist(failed_sections), collapse = ", "))) + } + + rationales <- response$section_rationales %||% acp_response$section_rationales %||% list() + if (length(rationales) > 0) { + cat("\nRationales\n") + for (section in names(rationales)) { + entry <- rationales[[section]] + section_title <- .studyAgentAnalyticSettingsSectionTitles()[[section]] %||% section + cat(sprintf( + "[%s] confidence=%s\n%s\n", + section_title, + entry$confidence %||% "?", + entry$rationale %||% "" + )) + } + } + } else { + cat("\nCohort Method analytic settings recommendation could not be generated by ACP.\n") + cat("Using the current default analytic settings for now.\n") + reason <- diagnostics$reason %||% response$error %||% acp_response$error %||% NULL + if (!is.null(reason) && nzchar(as.character(reason))) { + cat(sprintf("Reason: %s\n", as.character(reason))) + } else { + message <- acp_response$message %||% acp_response$error %||% diagnostics$reason %||% NULL + if (!is.null(message) && nzchar(as.character(message))) { + cat(sprintf("Reason: %s\n", as.character(message))) + } + } + detail_paths <- c( + if (!is.null(acp_response_path) && nzchar(as.character(acp_response_path))) as.character(acp_response_path) else NULL, + if (!is.null(recommendation_path) && nzchar(as.character(recommendation_path))) as.character(recommendation_path) else NULL + ) + if (length(detail_paths) > 0) { + cat("Details saved to:\n") + for (path in detail_paths) { + cat(sprintf(" - %s\n", path)) + } + } + } + } + + build_dummy_analytic_settings_recommendation <- function(description, defaults_snapshot, input_method = "typed_text") { + list( + mode = "free_text", + input_method = input_method, + source = "manual_shell", + status = "dummy_generated", + profile_name = "Recommended from free-text description", + raw_description = description, + study_population = "TODO: derive study population settings from free-text description", + time_at_risk = "TODO: derive time-at-risk settings from free-text description", + propensity_score_adjustment = "TODO: derive propensity score adjustment settings from free-text description", + outcome_model = "TODO: derive outcome model settings from free-text description", + deferred_inputs = list( + function_argument_description = "implemented", + description_file_path = "implemented", + interactive_typed_description = "implemented" + ), + defaults_snapshot = defaults_snapshot + ) + } + + call_cohort_methods_specifications_recommendation <- function(acp_url, + body, + defaults_snapshot, + input_method = "typed_text") { + flow_name <- "cohort_methods_specifications_recommendation" + dummy_recommendation <- build_dummy_analytic_settings_recommendation( + description = body$analytic_settings_description %||% body$study_description %||% "", + defaults_snapshot = defaults_snapshot, + input_method = input_method + ) + + ensure_connected <- function(url) { + has_acp_state <- exists("acp_state", inherits = TRUE) + has_acp_connect <- exists("acp_connect", mode = "function", inherits = TRUE) + if (!has_acp_state || !has_acp_connect) return(FALSE) + if (!is.null(get("acp_state", inherits = TRUE)$url)) return(TRUE) + if (is.null(url) || !nzchar(trimws(url))) return(FALSE) + tryCatch({ + acp_connect(url) + TRUE + }, error = function(e) { + FALSE + }) + } + + use_acp <- ensure_connected(acp_url) + has_acp_post <- exists(".acp_post", mode = "function", inherits = TRUE) + if (!isTRUE(use_acp) || !has_acp_post) { + return(list( + flow = flow_name, + source = "stub_acp_placeholder", + status = "stub", + message = "ACP bridge unavailable, ACP helpers not loaded, or ACP not connected. Returning placeholder cohort methods specifications recommendation.", + request = body, + recommendation = dummy_recommendation + )) + } + + response <- tryCatch( + .acp_post(sprintf("/flows/%s", flow_name), body), + error = function(e) { + list( + flow = flow_name, + source = "stub_acp_placeholder", + status = "stub", + error = conditionMessage(e), + message = "ACP flow failed. Returning placeholder cohort methods specifications recommendation.", + request = body, + recommendation = dummy_recommendation + ) + } + ) + + if (is.list(response) && identical(response$source, "stub_acp_placeholder")) { + return(response) + } + + recommendation <- response$recommendation %||% + response$recommendations %||% + response$cohort_methods_specifications_recommendation %||% + dummy_recommendation + if (!is.list(recommendation)) recommendation <- dummy_recommendation + + list( + flow = flow_name, + source = "acp_flow", + status = "received", + request = body, + response = response, + recommendation = recommendation + ) + } + + study_base_dir <- "" + if (nzchar(studyAgentBaseDir)) { + study_base_dir <- normalizePath(studyAgentBaseDir, winslash = "/", mustWork = FALSE) + } + + if (!is.null(analyticSettingsDescription)) { + analyticSettingsDescription <- trimws(as.character(analyticSettingsDescription)) + if (!nzchar(analyticSettingsDescription)) analyticSettingsDescription <- NULL + } + if (!is.null(analyticSettingsDescriptionPath)) { + analyticSettingsDescriptionPath <- trimws(as.character(analyticSettingsDescriptionPath)) + if (!nzchar(analyticSettingsDescriptionPath)) analyticSettingsDescriptionPath <- NULL + } + + outputDir <- resolve_path(outputDir, study_base_dir) + outputDir <- normalizePath(outputDir, winslash = "/", mustWork = FALSE) + if (isTRUE(reset) && dir.exists(outputDir)) { + ok <- TRUE + if (isTRUE(interactive)) { + ok <- prompt_yesno(sprintf("Delete existing output directory %s?", outputDir), default = FALSE) + } + if (ok) unlink(outputDir, recursive = TRUE, force = TRUE) + } + + base_dir <- outputDir + incidence_base_dir <- resolve_path(incidenceOutputDir, study_base_dir) + incidence_base_dir <- normalizePath(incidence_base_dir, winslash = "/", mustWork = FALSE) + index_dir <- resolve_path(indexDir, study_base_dir) + index_dir <- normalizePath(index_dir, winslash = "/", mustWork = FALSE) + catalog_df <- load_catalog(index_dir) + analytic_settings_description_path_resolved <- if (is.null(analyticSettingsDescriptionPath)) { + NULL + } else { + normalizePath(resolve_path(analyticSettingsDescriptionPath, study_base_dir), winslash = "/", mustWork = FALSE) + } + if (!dir.exists(index_dir) && !is_absolute_path(indexDir) && !nzchar(studyAgentBaseDir)) { + alt <- file.path(getwd(), "OHDSI-Study-Agent", indexDir) + if (dir.exists(alt)) index_dir <- normalizePath(alt, winslash = "/", mustWork = FALSE) + } + index_def_dir <- file.path(index_dir, "definitions") + if (!dir.exists(index_def_dir)) stop(sprintf("Missing phenotype index definitions folder: %s", index_def_dir)) + + output_dir <- file.path(base_dir, "outputs") + selected_dir <- file.path(base_dir, "selected-cohorts") + patched_dir <- file.path(base_dir, "patched-cohorts") + selected_target_dir <- file.path(base_dir, "selected-target-cohorts") + selected_comparator_dir <- file.path(base_dir, "selected-comparator-cohorts") + selected_outcome_dir <- file.path(base_dir, "selected-outcome-cohorts") + patched_target_dir <- file.path(base_dir, "patched-target-cohorts") + patched_comparator_dir <- file.path(base_dir, "patched-comparator-cohorts") + patched_outcome_dir <- file.path(base_dir, "patched-outcome-cohorts") + concept_sets_dir <- file.path(base_dir, "concept-sets") + keeper_dir <- file.path(base_dir, "keeper-case-review") + analysis_settings_dir <- file.path(base_dir, "analysis-settings") + scripts_dir <- file.path(base_dir, "scripts") + cm_results_dir <- file.path(base_dir, "cm-results") + cm_diagnostics_dir <- file.path(base_dir, "cm-diagnostics") + cm_data_dir <- file.path(base_dir, "cm-data") + + dirs <- c( + output_dir, selected_dir, patched_dir, selected_target_dir, selected_comparator_dir, + selected_outcome_dir, patched_target_dir, patched_comparator_dir, patched_outcome_dir, + concept_sets_dir, + keeper_dir, analysis_settings_dir, scripts_dir, cm_results_dir, cm_diagnostics_dir, + cm_data_dir + ) + for (dir_path in dirs) ensure_dir(dir_path) + + manual_intent_path <- file.path(output_dir, "manual_intent.json") + manual_inputs_path <- file.path(output_dir, "manual_inputs.json") + cohort_methods_intent_split_path <- file.path(output_dir, "cohort_methods_intent_split.json") + cohort_roles_path <- file.path(output_dir, "cohort_roles.json") + cohort_id_map_path <- file.path(output_dir, "cohort_id_map.json") + incidence_cohort_id_map_path <- file.path(incidence_base_dir, "outputs", "cohort_id_map.json") + incidence_selected_target_dir <- file.path(incidence_base_dir, "selected-target-cohorts") + incidence_selected_outcome_dir <- file.path(incidence_base_dir, "selected-outcome-cohorts") + cm_comparisons_path <- file.path(output_dir, "cm_comparisons.json") + improvements_status_path <- file.path(output_dir, "improvements_status.json") + cm_evaluation_todo_path <- file.path(output_dir, "cm_evaluation_todo.json") + cm_defaults_path <- file.path(output_dir, "cm_analysis_defaults.json") + cm_analysis_json_path <- file.path(analysis_settings_dir, "cmAnalysis.json") + cm_analysis_template_path <- system.file("templates", "cmAnalysis_template.json", package = "OHDSIAssistant") + if (!nzchar(cm_analysis_template_path)) { + cm_analysis_template_path <- resolve_path("mcp_server/prompts/cohort_methods/cmAnalysis_template.json", study_base_dir) + cm_analysis_template_path <- normalizePath(cm_analysis_template_path, winslash = "/", mustWork = FALSE) + } + if (!file.exists(cm_analysis_template_path)) { + cm_analysis_template_path <- NA_character_ + } + cm_acp_specifications_recommendation_path <- file.path(output_dir, "cm_acp_specifications_recommendation.json") + cm_analytic_settings_recommendation_path <- file.path(output_dir, "cm_analytic_settings_recommendation.json") + cm_concept_set_selections_path <- file.path(output_dir, "cm_concept_set_selections.json") + recs_target_path <- file.path(output_dir, "recommendations_target.json") + recs_comparator_path <- file.path(output_dir, "recommendations_comparator.json") + recs_outcome_path <- file.path(output_dir, "recommendations_outcome.json") + improvements_target_path <- file.path(output_dir, "improvements_target.json") + improvements_comparator_path <- file.path(output_dir, "improvements_comparator.json") + improvements_outcome_path <- file.path(output_dir, "improvements_outcome.json") + state_path <- file.path(output_dir, "study_agent_state.json") + + cached_inputs <- NULL + cached_manual_intent <- NULL + cached_cm_target_selection <- load_cached_role_selection(cohort_id_map_path, "target", selected_target_dir) + cached_cm_comparator_selection <- load_cached_role_selection(cohort_id_map_path, "comparator", selected_comparator_dir) + cached_cm_outcome_selection <- load_cached_role_selection(cohort_id_map_path, "outcome", selected_outcome_dir) + cached_incidence_target_selection <- load_cached_role_selection(incidence_cohort_id_map_path, "target", incidence_selected_target_dir) + cached_incidence_outcome_selection <- load_cached_role_selection(incidence_cohort_id_map_path, "outcome", incidence_selected_outcome_dir) + + if (interactive) { + banner_path <- resolve_path(bannerPath, study_base_dir) + banner_path <- normalizePath(banner_path, winslash = "/", mustWork = FALSE) + if (!file.exists(banner_path) && !is_absolute_path(bannerPath) && !nzchar(studyAgentBaseDir)) { + alt <- file.path(getwd(), "OHDSI-Study-Agent", bannerPath) + if (file.exists(alt)) banner_path <- normalizePath(alt, winslash = "/", mustWork = FALSE) + } + if (file.exists(banner_path)) { + cat(paste(readLines(banner_path, warn = FALSE), collapse = "\n"), "\n") + } + cat("\nStudy Agent: Strategus CohortMethod shell\n") + } + + default_intent <- studyIntent %||% cached_inputs$study_intent %||% + "Compare a target exposure versus a comparator exposure on one or more outcomes using a cohort method design." + if (isTRUE(interactive)) { + entered <- readline(sprintf("Study intent [%s]: ", default_intent)) + if (nzchar(trimws(entered))) { + studyIntent <- entered + } else { + studyIntent <- default_intent + } + } else if (is.null(studyIntent) || !nzchar(trimws(studyIntent))) { + studyIntent <- default_intent + } + + nonempty_string <- function(value) { + !is.null(value) && length(value) > 0 && nzchar(trimws(as.character(value[[1]]))) + } + first_nonempty <- function(...) { + values <- list(...) + for (value in values) { + if (nonempty_string(value)) return(trimws(as.character(value[[1]]))) + } + NULL + } + normalize_statement_list <- function(value) { + if (is.null(value)) return(character(0)) + if (is.data.frame(value)) value <- unlist(value, recursive = TRUE, use.names = FALSE) + if (is.list(value) && !is.character(value)) value <- unlist(value, recursive = TRUE, use.names = FALSE) + values <- trimws(as.character(value)) + values <- values[!is.na(values) & nzchar(values)] + unique(values) + } + statement_dedupe_key <- function(value) { + key <- tolower(trimws(as.character(value))) + key <- gsub("[[:space:]]+", " ", key) + key <- gsub("[[:punct:]]+$", "", key) + key + } + dedupe_statement_list <- function(statements) { + statements <- normalize_statement_list(statements) + if (length(statements) == 0) return(character(0)) + keys <- statement_dedupe_key(statements) + statements[!duplicated(keys)] + } + combine_statement_list <- function(primary = NULL, ...) { + statements <- normalize_statement_list(primary) + for (value in list(...)) { + statements <- c(statements, normalize_statement_list(value)) + } + dedupe_statement_list(statements) + } + prompt_outcome_statements <- function(defaults) { + defaults <- dedupe_statement_list(defaults) + if (!isTRUE(interactive)) return(defaults) + if (length(defaults) > 1) { + cat("\nSuggested outcome statements:\n") + for (i in seq_along(defaults)) { + cat(sprintf(" %s. %s\n", i, defaults[[i]])) + } + default_selection <- paste(seq_along(defaults), collapse = ",") + use_manual_outcome <- FALSE + repeat { + entered <- trimws(readline(sprintf( + "Keep outcome statements [%s] (comma-separated numbers, 0/none to enter manually, Enter keeps all): ", + default_selection + ))) + if (!nzchar(entered)) { + selected <- seq_along(defaults) + } else if (tolower(entered) %in% c("a", "all")) { + selected <- seq_along(defaults) + } else if (tolower(entered) %in% c("0", "n", "none")) { + selected <- integer(0) + use_manual_outcome <- TRUE + } else { + selected <- suppressWarnings(parse_ids(entered)) + selected <- unique(selected[!is.na(selected)]) + } + invalid <- setdiff(selected, seq_along(defaults)) + if (!isTRUE(use_manual_outcome) && (length(selected) == 0 || length(invalid) > 0)) { + cat(sprintf("Please enter one or more valid outcome numbers, such as 1 or 1,3, or 0/none to enter manually. Valid choices: %s\n", default_selection)) + next + } + defaults <- defaults[selected] + break + } + cat("Press Enter to keep each selected statement, or type an edited statement.\n") + } + if (length(defaults) == 0) { + entered <- prompt_statement("Outcome", default = "") + return(dedupe_statement_list(entered)) + } + resolved <- vapply(seq_along(defaults), function(i) { + label <- if (length(defaults) == 1) "Outcome" else sprintf("Outcome %s", i) + prompt_statement(label, default = defaults[[i]]) + }, character(1)) + repeat { + add_another <- prompt_yesno("Add another outcome statement?", default = FALSE) + if (!isTRUE(add_another)) break + next_label <- sprintf("Outcome %s", length(resolved) + 1L) + resolved <- c(resolved, prompt_statement(next_label, default = "")) + } + dedupe_statement_list(resolved) + } + summarize_intent_split_error <- function(split_core) { + error_text <- as.character(split_core$error %||% split_core$message %||% "") + if (!nzchar(trimws(error_text))) return("unknown error") + nested_text <- trimws(sub("^ACP error:\\s*", "", error_text)) + if (grepl("^\\{", nested_text)) { + nested <- tryCatch(jsonlite::fromJSON(nested_text, simplifyVector = FALSE), error = function(e) NULL) + if (is.list(nested)) { + diagnostics <- nested$diagnostics %||% list() + parts <- c( + nested$error %||% nested$status, + diagnostics$llm_status, + diagnostics$llm_error + ) + parts <- as.character(parts[!vapply(parts, is.null, logical(1))]) + parts <- parts[nzchar(trimws(parts))] + if (length(parts) > 0) return(paste(unique(parts), collapse = " / ")) + } + } + error_text + } + + target_statement_default <- first_nonempty( + targetStatement, + cached_manual_intent$target_statement, + cached_inputs$target_statement + ) + comparator_statement_default <- first_nonempty( + comparatorStatement, + cached_manual_intent$comparator_statement, + cached_inputs$comparator_statement + ) + outcome_statement_default <- first_nonempty( + outcomeStatement, + cached_manual_intent$outcome_statement, + cached_inputs$outcome_statement, + normalize_statement_list(cached_manual_intent$outcome_statements), + normalize_statement_list(cached_inputs$outcome_statements) + ) + outcome_statements_default <- combine_statement_list( + outcome_statement_default, + cached_manual_intent$outcome_statements, + cached_inputs$outcome_statements + ) + + explicit_target_ids_from_args <- normalize_selected_ids( + targetCohortId, + "Target cohort ID", + allow_multiple = FALSE + ) + explicit_comparator_ids_from_args <- normalize_selected_ids( + comparatorCohortId, + "Comparator cohort ID", + allow_multiple = FALSE + ) + explicit_outcome_ids_from_args <- normalize_selected_ids( + outcomeCohortIds, + "Outcome cohort IDs", + allow_multiple = TRUE + ) + all_cohort_ids_from_function_args <- length(explicit_target_ids_from_args) == 1 && + length(explicit_comparator_ids_from_args) == 1 && + length(explicit_outcome_ids_from_args) > 0 + skip_intent_split_and_recommendation <- FALSE + explicit_outcome_statements_from_args <- character(0) + if (isTRUE(all_cohort_ids_from_function_args) && isTRUE(interactive)) { + cat("\nAll target, comparator, and outcome cohort IDs were provided as function arguments:\n") + cat(sprintf(" Target: %s\n", format_cohort_selection_summary(explicit_target_ids_from_args, catalog_df))) + cat(sprintf(" Comparator: %s\n", format_cohort_selection_summary(explicit_comparator_ids_from_args, catalog_df))) + cat(sprintf(" Outcome: %s\n", format_cohort_selection_summary(explicit_outcome_ids_from_args, catalog_df))) + skip_intent_split_and_recommendation <- prompt_yesno( + "Skip study intent split, phenotype recommendation, and phenotype improvements, and use these cohort IDs directly?", + default = TRUE + ) + } + + cohort_id_statement <- function(role_label, ids) { + ids <- as.integer(ids[!is.na(ids)]) + if (length(ids) == 0) return(NULL) + labels <- vapply(ids, function(id) { + lookup_catalog_value(id, catalog_df, "name", sprintf("Cohort %s", id)) + }, character(1)) + if (length(ids) == 1) { + return(sprintf("%s cohort: %s (ID %s)", role_label, labels[[1]], ids[[1]])) + } + items <- sprintf("%s (ID %s)", labels, ids) + sprintf("%s cohorts: %s", role_label, paste(items, collapse = "; ")) + } + + if (isTRUE(skip_intent_split_and_recommendation)) { + target_statement_default <- first_nonempty( + target_statement_default, + cohort_id_statement("Target", explicit_target_ids_from_args) + ) + comparator_statement_default <- first_nonempty( + comparator_statement_default, + cohort_id_statement("Comparator", explicit_comparator_ids_from_args) + ) + explicit_outcome_statements_from_args <- vapply(explicit_outcome_ids_from_args, function(id) { + cohort_id_statement("Outcome", as.integer(id)) + }, character(1)) + outcome_statement_default <- first_nonempty( + outcome_statement_default, + explicit_outcome_statements_from_args + ) + outcome_statements_default <- combine_statement_list( + outcome_statement_default, + outcome_statements_default, + explicit_outcome_statements_from_args + ) + } + + cohort_methods_intent_split_source <- "not_run" + cohort_methods_intent_split_status <- "not_run" + cohort_methods_intent_split_response <- NULL + have_all_statement_defaults <- nonempty_string(target_statement_default) && + nonempty_string(comparator_statement_default) && + nonempty_string(outcome_statement_default) + + if (isTRUE(skip_intent_split_and_recommendation)) { + cohort_methods_intent_split_source <- "skipped_explicit_cohort_ids" + cohort_methods_intent_split_status <- "skipped" + if (isTRUE(interactive)) { + cat("\nSkipping study intent split, phenotype recommendation, and phenotype improvements for explicit cohort IDs.\n") + } + } else if (!isTRUE(have_all_statement_defaults)) { + if (isTRUE(interactive)) { + cat("\n== Step 1: Parse study intent into target/comparator/outcome statements ==\n") + } + if (maybe_use_cache(cohort_methods_intent_split_path, "cohort-methods intent split")) { + cohort_methods_intent_split_response <- read_json(cohort_methods_intent_split_path) + cohort_methods_intent_split_source <- "cached" + } else if (ensure_acp_ready(acpUrl)) { + if (isTRUE(interactive)) { + cat("Calling ACP flow: cohort_methods_intent_split\n") + } else { + message("Calling ACP flow: cohort_methods_intent_split") + } + cohort_methods_intent_split_response <- tryCatch( + .acp_post("/flows/cohort_methods_intent_split", list(study_intent = studyIntent)), + error = function(e) { + list(status = "error", error = conditionMessage(e)) + } + ) + write_json(cohort_methods_intent_split_response, cohort_methods_intent_split_path) + cohort_methods_intent_split_source <- "acp_flow" + } + + split_core <- cohort_methods_intent_split_response$intent_split %||% cohort_methods_intent_split_response + if (!is.null(split_core) && is.null(split_core$error)) { + cohort_methods_intent_split_status <- as.character(split_core$status %||% "ok") + target_statement_default <- first_nonempty(target_statement_default, split_core$target_statement) + comparator_statement_default <- first_nonempty(comparator_statement_default, split_core$comparator_statement) + outcome_statement_default <- first_nonempty( + outcome_statement_default, + split_core$outcome_statement, + normalize_statement_list(split_core$outcome_statements) + ) + outcome_statements_default <- combine_statement_list( + outcome_statement_default, + outcome_statements_default, + split_core$outcome_statement, + split_core$outcome_statements + ) + if (isTRUE(interactive)) { + rationale <- as.character(split_core$rationale %||% "") + if (nzchar(rationale)) { + cat("\nSuggested rationale:\n") + cat(rationale, "\n") + } + if (length(split_core$questions %||% list()) > 0) { + cat("Questions to clarify:\n") + for (q in split_core$questions) cat(sprintf(" - %s\n", q)) + } + } + } else if (!is.null(split_core$error)) { + cohort_methods_intent_split_status <- "error" + if (isTRUE(interactive)) { + cat("\nACP cohort_methods_intent_split failed:\n") + cat(sprintf(" %s\n", summarize_intent_split_error(split_core))) + cat("Proceeding with manual target/comparator/outcome statement entry.\n") + } + } + } else { + cohort_methods_intent_split_source <- "manual_or_cached_statements" + cohort_methods_intent_split_status <- "not_needed" + } + + if (!isTRUE(interactive) && + identical(cohort_methods_intent_split_status, "needs_clarification") && + (!nonempty_string(target_statement_default) || + !nonempty_string(comparator_statement_default) || + !nonempty_string(outcome_statement_default))) { + stop("Cohort methods intent split needs clarification. Provide targetStatement, comparatorStatement, and outcomeStatement for non-interactive execution.") + } + + outcome_statements_default <- combine_statement_list(outcome_statement_default, outcome_statements_default) + + if (isTRUE(skip_intent_split_and_recommendation)) { + targetStatement <- target_statement_default + comparatorStatement <- comparator_statement_default + outcomeStatements <- outcome_statements_default + outcomeStatement <- first_nonempty(outcomeStatements) + } else { + targetStatement <- prompt_statement("Target", default = target_statement_default) + comparatorStatement <- prompt_statement("Comparator", default = comparator_statement_default) + outcomeStatements <- prompt_outcome_statements(outcome_statements_default) + outcomeStatement <- first_nonempty(outcomeStatements) + } + + if (!nonempty_string(targetStatement) || !nonempty_string(comparatorStatement) || !nonempty_string(outcomeStatement)) { + stop( + "Missing target, comparator, or outcome cohort statement. ", + "Provide explicit targetStatement/comparatorStatement/outcomeStatement, ", + "reuse a valid cache, or run ACP with /flows/cohort_methods_intent_split available." + ) + } + + validate_target_id <- function(target_id) { + if (!cohort_json_exists(target_id, index_def_dir)) { + return(sprintf("Target cohort ID %s was not found in %s. Please enter a valid target cohort ID.", target_id, index_def_dir)) + } + NULL + } + validate_comparator_id <- function(comparator_id, target_id) { + if (target_id == comparator_id) { + return("Target and comparator cohort IDs must be different.") + } + if (!cohort_json_exists(comparator_id, index_def_dir)) { + return(sprintf("Comparator cohort ID %s was not found in %s. Please enter a valid comparator cohort ID.", comparator_id, index_def_dir)) + } + NULL + } + validate_outcome_ids <- function(outcome_ids, target_id, comparator_id) { + if (any(outcome_ids %in% c(target_id, comparator_id))) { + return("Outcome cohort IDs must be distinct from the target and comparator cohort IDs.") + } + missing_outcomes <- outcome_ids[!vapply(outcome_ids, cohort_json_exists, logical(1), index_def_dir = index_def_dir)] + if (length(missing_outcomes) > 0) { + return(sprintf( + "Outcome cohort ID(s) %s were not found in %s. Please enter valid outcome cohort IDs.", + paste(missing_outcomes, collapse = ", "), + index_def_dir + )) + } + NULL + } + validate_manual_ids <- function(target_id, comparator_id, outcome_ids) { + target_error <- validate_target_id(target_id) + if (!is.null(target_error)) return(target_error) + comparator_error <- validate_comparator_id(comparator_id, target_id) + if (!is.null(comparator_error)) return(comparator_error) + validate_outcome_ids(outcome_ids, target_id, comparator_id) + } + patched_outputs_cleared <- FALSE + ensure_patched_outputs_cleared <- function() { + if (!isTRUE(patched_outputs_cleared)) { + clear_patched_role_outputs() + patched_outputs_cleared <<- TRUE + } + } + improvements_results <- list() + use_function_argument_ids_for_selection <- !( + isTRUE(all_cohort_ids_from_function_args) && + isTRUE(interactive) && + !isTRUE(skip_intent_split_and_recommendation) + ) + preferred_target_ids <- if (isTRUE(use_function_argument_ids_for_selection)) targetCohortId else NULL + preferred_comparator_ids <- if (isTRUE(use_function_argument_ids_for_selection)) comparatorCohortId else NULL + + target_rec <- run_role_recommendation( + role_label = "Target", + statement = targetStatement, + output_path = recs_target_path, + top_k = topK, + max_results = maxResults, + candidate_limit = candidateLimit, + allow_multiple = FALSE, + preferred_selected_ids = preferred_target_ids, + preferred_selection_source = "function_argument", + cached_selected_ids = cached_inputs$target_cohort_id %||% NULL, + selected_cache_label = "target cohort selection", + selected_cache_dir = selected_target_dir, + cohort_method_cache = list( + selection = list( + selected_ids = cached_cm_target_selection$selected_ids %||% NULL, + cache_dir = selected_target_dir + ) + ), + incidence_cache = list( + selection = list( + selected_ids = cached_incidence_target_selection$selected_ids %||% NULL, + cache_dir = incidence_selected_target_dir, + label = "incidence target cohort selection" + ) + ) + ) + + targetCohortId <- if (length(target_rec$selected_ids) > 0) { + as.integer(target_rec$selected_ids[[1]]) + } else { + collect_single_id(targetCohortId %||% cached_inputs$target_cohort_id, "Target") + } + if (!length(target_rec$selected_ids)) target_rec$selection_source <- "manual_input" + target_validation_error <- validate_target_id(targetCohortId) + while (!is.null(target_validation_error) && isTRUE(interactive)) { + cat(sprintf("%s\n", target_validation_error)) + targetCohortId <- collect_single_id(NULL, "Target") + target_rec$selection_source <- "manual_input" + target_rec$selected_ids <- as.integer(targetCohortId) + target_validation_error <- validate_target_id(targetCohortId) + } + if (!is.null(target_validation_error)) { + stop(target_validation_error) + } + selected_target_id <- as.integer(targetCohortId) + + default_cohort_id_base_ids <- suppressWarnings(as.integer(c( + targetCohortId, + comparatorCohortId, + outcomeCohortIds, + cached_inputs$target_cohort_id %||% NULL, + cached_inputs$comparator_cohort_id %||% NULL, + cached_inputs$outcome_cohort_ids %||% NULL, + catalog_df$cohortId + ))) + default_cohort_id_base_ids <- default_cohort_id_base_ids[!is.na(default_cohort_id_base_ids)] + default_cohort_id_base <- if (length(default_cohort_id_base_ids) > 0) { + max(default_cohort_id_base_ids, na.rm = TRUE) + 1000L + } else { + 1000L + } + use_mapping <- isTRUE(remapCohortIds) + if (isTRUE(interactive)) { + use_mapping <- prompt_yesno("Map cohort IDs to a new range (avoid collisions)?", default = isTRUE(remapCohortIds)) + } + if (use_mapping) { + cohortIdBase <- cohortIdBase %||% cached_inputs$cohort_id_base %||% default_cohort_id_base + cohortIdBase <- suppressWarnings(as.integer(cohortIdBase)) + if (isTRUE(interactive)) { + entered <- trimws(readline(sprintf("Cohort ID base [%s]: ", cohortIdBase))) + if (nzchar(entered)) cohortIdBase <- suppressWarnings(as.integer(entered)) + } + cohortIdBase <- validate_positive_integer(cohortIdBase, "cohortIdBase") + } else { + cohortIdBase <- NA_integer_ + } + + next_id <- cohortIdBase + map_ids <- function(ids) { + if (!use_mapping) return(as.integer(ids)) + new_ids <- seq.int(next_id, length.out = length(ids)) + next_id <<- max(new_ids) + 1L + as.integer(new_ids) + } + new_target_id <- map_ids(selected_target_id) + copy_cohort_json_multi(selected_target_id, new_target_id, c(selected_target_dir, selected_dir), index_def_dir) + ensure_patched_outputs_cleared() + improvements_results$target <- if (isTRUE(skip_intent_split_and_recommendation)) { + skipped_role_improvements( + role_key = "target", + role_label = "Target", + cohort_ids = new_target_id, + patched_role_dir = patched_target_dir, + improvements_path = improvements_target_path, + reason = "explicit_cohort_ids_skip_confirmed" + ) + } else { + run_role_improvement_gate( + role_key = "target", + role_label = "Target", + cohort_ids = new_target_id, + selected_role_dir = selected_target_dir, + patched_role_dir = patched_target_dir, + improvements_path = improvements_target_path, + role_statement = targetStatement + ) + } + + comparator_rec <- run_role_recommendation( + role_label = "Comparator", + statement = comparatorStatement, + output_path = recs_comparator_path, + top_k = topK, + max_results = maxResults, + candidate_limit = candidateLimit, + allow_multiple = FALSE, + preferred_selected_ids = preferred_comparator_ids, + preferred_selection_source = "function_argument", + cached_selected_ids = cached_inputs$comparator_cohort_id %||% NULL, + selected_cache_label = "comparator cohort selection", + selected_cache_dir = selected_comparator_dir, + cohort_method_cache = list( + selection = list( + selected_ids = cached_cm_comparator_selection$selected_ids %||% NULL, + cache_dir = selected_comparator_dir + ) + ), + incidence_cache = list( + selection = list( + selected_ids = NULL, + cache_dir = NULL, + label = NULL + ) + ) + ) + + comparatorCohortId <- if (length(comparator_rec$selected_ids) > 0) { + as.integer(comparator_rec$selected_ids[[1]]) + } else { + collect_single_id(comparatorCohortId %||% cached_inputs$comparator_cohort_id, "Comparator") + } + if (!length(comparator_rec$selected_ids)) comparator_rec$selection_source <- "manual_input" + comparator_validation_error <- validate_comparator_id(comparatorCohortId, targetCohortId) + while (!is.null(comparator_validation_error) && isTRUE(interactive)) { + cat(sprintf("%s\n", comparator_validation_error)) + comparatorCohortId <- collect_single_id(NULL, "Comparator") + comparator_rec$selection_source <- "manual_input" + comparator_rec$selected_ids <- as.integer(comparatorCohortId) + comparator_validation_error <- validate_comparator_id(comparatorCohortId, targetCohortId) + } + if (!is.null(comparator_validation_error)) { + stop(comparator_validation_error) + } + selected_comparator_id <- as.integer(comparatorCohortId) + new_comparator_id <- map_ids(selected_comparator_id) + copy_cohort_json_multi(selected_comparator_id, new_comparator_id, c(selected_comparator_dir, selected_dir), index_def_dir) + improvements_results$comparator <- if (isTRUE(skip_intent_split_and_recommendation)) { + skipped_role_improvements( + role_key = "comparator", + role_label = "Comparator", + cohort_ids = new_comparator_id, + patched_role_dir = patched_comparator_dir, + improvements_path = improvements_comparator_path, + reason = "explicit_cohort_ids_skip_confirmed" + ) + } else { + run_role_improvement_gate( + role_key = "comparator", + role_label = "Comparator", + cohort_ids = new_comparator_id, + selected_role_dir = selected_comparator_dir, + patched_role_dir = patched_comparator_dir, + improvements_path = improvements_comparator_path, + role_statement = comparatorStatement + ) + } + + cached_input_outcome_ids <- normalize_selected_ids( + cached_inputs$outcome_cohort_ids %||% NULL, + "cached outcome cohort IDs", + allow_multiple = TRUE + ) + preferred_outcome_ids <- normalize_selected_ids( + if (isTRUE(use_function_argument_ids_for_selection)) outcomeCohortIds else NULL, + "Outcome cohort IDs", + allow_multiple = TRUE + ) + preferred_outcome_source <- "function_argument" + if (length(preferred_outcome_ids) == 0 && + length(outcomeStatements) <= 1 && + length(cached_input_outcome_ids) > 0) { + preferred_outcome_ids <- cached_input_outcome_ids + preferred_outcome_source <- "cached_manual_input" + } + outcome_recommendation_path <- function(i) { + if (identical(as.integer(i), 1L)) return(recs_outcome_path) + file.path(output_dir, sprintf("recommendations_outcome_%s.json", as.integer(i))) + } + run_per_outcome_recommendations <- length(outcomeStatements) > 1 && + length(preferred_outcome_ids) == 0 + if (isTRUE(run_per_outcome_recommendations)) { + outcome_recs <- lapply(seq_along(outcomeStatements), function(i) { + run_role_recommendation( + role_label = if (identical(as.integer(i), 1L)) "Outcome" else sprintf("Outcome %s", i), + statement = outcomeStatements[[i]], + output_path = outcome_recommendation_path(i), + top_k = topK, + max_results = maxResults, + candidate_limit = candidateLimit, + allow_multiple = FALSE, + preferred_selected_ids = NULL, + preferred_selection_source = "function_argument", + cached_selected_ids = NULL, + selected_cache_label = NULL, + selected_cache_dir = NULL, + cohort_method_cache = NULL, + incidence_cache = NULL + ) + }) + } else { + outcome_recs <- list(run_role_recommendation( + role_label = "Outcome", + statement = outcomeStatement, + output_path = recs_outcome_path, + top_k = topK, + max_results = maxResults, + candidate_limit = candidateLimit, + allow_multiple = TRUE, + preferred_selected_ids = preferred_outcome_ids, + preferred_selection_source = preferred_outcome_source, + cached_selected_ids = cached_inputs$outcome_cohort_ids %||% NULL, + selected_cache_label = "outcome cohort selections", + selected_cache_dir = selected_outcome_dir, + cohort_method_cache = list( + selection = list( + selected_ids = cached_cm_outcome_selection$selected_ids %||% NULL, + cache_dir = selected_outcome_dir + ) + ), + incidence_cache = list( + selection = list( + selected_ids = cached_incidence_outcome_selection$selected_ids %||% NULL, + cache_dir = incidence_selected_outcome_dir, + label = "incidence outcome cohort selection" + ) + ) + )) + } + outcome_recommendations <- lapply(seq_along(outcome_recs), function(i) { + rec <- outcome_recs[[i]] + list( + outcome_index = as.integer(i), + statement = rec$statement %||% outcomeStatements[[min(i, length(outcomeStatements))]], + selected_ids = as.list(as.integer(rec$selected_ids)), + path = json_string_or_null(rec$recommendation_path), + source = rec$recommendation_source, + selection_source = rec$selection_source, + used_cached_recommendation = isTRUE(rec$used_cached_recommendation), + used_cached_selection = isTRUE(rec$used_cached_selection), + used_window2 = isTRUE(rec$used_window2), + used_advice = isTRUE(rec$used_advice) + ) + }) + outcome_rec <- outcome_recs[[1]] + outcome_selected_ids_by_rec <- lapply(outcome_recs, function(rec) as.integer(rec$selected_ids)) + outcome_selected_statements_by_rec <- lapply(seq_along(outcome_recs), function(i) { + rec <- outcome_recs[[i]] + ids <- outcome_selected_ids_by_rec[[i]] + if (length(ids) == 0) return(character(0)) + rep( + rec$statement %||% outcomeStatements[[min(i, length(outcomeStatements))]], + length(ids) + ) + }) + outcome_selected_ids_flat <- as.integer(unlist(outcome_selected_ids_by_rec, use.names = FALSE)) + outcome_selected_statements_flat <- as.character(unlist(outcome_selected_statements_by_rec, use.names = FALSE)) + valid_outcome_selection <- !is.na(outcome_selected_ids_flat) + outcome_selected_ids_flat <- outcome_selected_ids_flat[valid_outcome_selection] + outcome_selected_statements_flat <- outcome_selected_statements_flat[valid_outcome_selection] + unique_outcome_selection <- !duplicated(outcome_selected_ids_flat) + outcome_rec$selected_ids <- as.integer(outcome_selected_ids_flat[unique_outcome_selection]) + outcome_selected_statements <- as.character(outcome_selected_statements_flat[unique_outcome_selection]) + if (length(outcome_recs) > 1) { + outcome_rec$recommendation_source <- "per_outcome" + outcome_rec$used_cached_recommendation <- any(vapply(outcome_recs, function(rec) isTRUE(rec$used_cached_recommendation), logical(1))) + outcome_rec$used_cached_selection <- any(vapply(outcome_recs, function(rec) isTRUE(rec$used_cached_selection), logical(1))) + outcome_rec$used_window2 <- any(vapply(outcome_recs, function(rec) isTRUE(rec$used_window2), logical(1))) + outcome_rec$used_advice <- any(vapply(outcome_recs, function(rec) isTRUE(rec$used_advice), logical(1))) + } + + outcomeCohortIds <- if (length(outcome_rec$selected_ids) > 0) { + as.integer(outcome_rec$selected_ids) + } else { + collect_outcome_ids(outcomeCohortIds %||% cached_inputs$outcome_cohort_ids) + } + if (!length(outcome_rec$selected_ids)) outcome_rec$selection_source <- "manual_input" + outcomeStatementsForSelectedCohorts <- if ( + isTRUE(skip_intent_split_and_recommendation) && + length(explicit_outcome_statements_from_args) == length(outcomeCohortIds) + ) { + as.character(explicit_outcome_statements_from_args) + } else if (length(outcome_selected_statements) == length(outcomeCohortIds)) { + as.character(outcome_selected_statements) + } else { + vapply( + seq_along(outcomeCohortIds), + function(i) outcomeStatements[[min(i, length(outcomeStatements))]], + character(1) + ) + } + outcome_validation_error <- validate_outcome_ids(outcomeCohortIds, targetCohortId, comparatorCohortId) + while (!is.null(outcome_validation_error) && isTRUE(interactive)) { + cat(sprintf("%s\n", outcome_validation_error)) + outcomeCohortIds <- collect_outcome_ids(NULL) + outcome_rec$selection_source <- "manual_input" + outcome_rec$selected_ids <- as.integer(outcomeCohortIds) + outcomeStatementsForSelectedCohorts <- vapply( + seq_along(outcomeCohortIds), + function(i) outcomeStatements[[min(i, length(outcomeStatements))]], + character(1) + ) + outcome_validation_error <- validate_outcome_ids(outcomeCohortIds, targetCohortId, comparatorCohortId) + } + if (!is.null(outcome_validation_error)) { + stop(outcome_validation_error) + } + validation_error <- validate_manual_ids(targetCohortId, comparatorCohortId, outcomeCohortIds) + if (!is.null(validation_error)) { + stop(validation_error) + } + selected_outcome_ids <- as.integer(outcomeCohortIds) + new_outcome_ids <- map_ids(selected_outcome_ids) + + for (i in seq_along(selected_outcome_ids)) { + copy_cohort_json_multi(selected_outcome_ids[[i]], new_outcome_ids[[i]], c(selected_outcome_dir, selected_dir), index_def_dir) + } + improvements_results$outcome <- if (isTRUE(skip_intent_split_and_recommendation)) { + skipped_role_improvements( + role_key = "outcome", + role_label = "Outcome", + cohort_ids = new_outcome_ids, + patched_role_dir = patched_outcome_dir, + improvements_path = improvements_outcome_path, + reason = "explicit_cohort_ids_skip_confirmed" + ) + } else { + run_role_improvement_gate( + role_key = "outcome", + role_label = "Outcome", + cohort_ids = new_outcome_ids, + selected_role_dir = selected_outcome_dir, + patched_role_dir = patched_outcome_dir, + improvements_path = improvements_outcome_path, + role_statement = paste(unique(outcomeStatementsForSelectedCohorts), collapse = "\n") + ) + } + do_target_improvements <- isTRUE(improvements_results$target$prompt_choice) + do_comparator_improvements <- isTRUE(improvements_results$comparator$prompt_choice) + do_outcome_improvements <- isTRUE(improvements_results$outcome$prompt_choice) + + target_original_name <- lookup_catalog_value(targetCohortId, catalog_df, "name", sprintf("Target cohort %s", targetCohortId)) + comparator_original_name <- lookup_catalog_value(comparatorCohortId, catalog_df, "name", sprintf("Comparator cohort %s", comparatorCohortId)) + outcome_original_names <- vapply( + outcomeCohortIds, + function(id) lookup_catalog_value(id, catalog_df, "name", sprintf("Outcome cohort %s", id)), + character(1) + ) + target_name <- prompt_analysis_label( + "Target", + cached_inputs$target_analysis_label %||% cached_inputs$target_name %||% target_original_name + ) + comparator_name <- prompt_analysis_label( + "Comparator", + cached_inputs$comparator_analysis_label %||% cached_inputs$comparator_name %||% comparator_original_name + ) + outcome_names <- vapply(seq_along(outcome_original_names), function(i) { + cached_labels <- cached_inputs$outcome_analysis_labels %||% cached_inputs$outcome_names %||% NULL + default_label <- if (!is.null(cached_labels) && length(cached_labels) >= i) cached_labels[[i]] else outcome_original_names[[i]] + prompt_analysis_label(sprintf("Outcome %s", i), default_label) + }, character(1)) + target_desc <- lookup_catalog_value(targetCohortId, catalog_df, "short_description", "") + comparator_desc <- lookup_catalog_value(comparatorCohortId, catalog_df, "short_description", "") + outcome_descs <- vapply( + outcomeCohortIds, + function(id) lookup_catalog_value(id, catalog_df, "short_description", ""), + character(1) + ) + + comparisonLabel <- comparisonLabel %||% cached_inputs$comparison_label + if (is.null(comparisonLabel) || !nzchar(trimws(comparisonLabel))) { + comparisonLabel <- sprintf("%s vs %s", target_name, comparator_name) + } + comparisonLabel <- prompt_analysis_label("Comparison", comparisonLabel) + + cached_analytic_settings <- cached_inputs$analytic_settings %||% list() + cached_analytics <- if (is.null(cached_analytic_settings)) list() else cached_analytic_settings + cached_covariate_settings <- cached_analytics$covariate_concept_sets %||% list() + + negative_control_enabled <- isTRUE(cached_inputs$negative_control_enabled) || + !is.null(negativeControlConceptSetId %||% cached_inputs$negative_control_concept_set_id) + if (isTRUE(interactive)) { + negative_control_enabled <- prompt_yesno( + "Add a negative control concept set selection?", + default = negative_control_enabled + ) + } + if (isTRUE(negative_control_enabled)) { + negativeControlConceptSetId <- collect_optional_single_id( + negativeControlConceptSetId %||% cached_inputs$negative_control_concept_set_id, + "Negative control concept set", + "Negative control concept set ID: " + ) + if (is.null(negativeControlConceptSetId)) { + stop("Negative control concept set ID is required when negative control concept set selection is enabled.") + } + } else { + negativeControlConceptSetId <- NULL + } + + cached_include_covariate_id <- cached_covariate_settings$include_concept_set_id %||% + cached_inputs$covariate_include_concept_set_id + cached_exclude_covariate_id <- cached_covariate_settings$exclude_concept_set_id %||% + cached_inputs$covariate_exclude_concept_set_id + cached_include_all_covariates <- cached_covariate_settings$include_all_concepts %||% + cached_inputs$covariate_include_all_concepts + covariate_enabled <- isTRUE(cached_covariate_settings$enabled %||% cached_inputs$covariate_concept_sets_enabled) || + !is.null(includeCovariateConceptSetId %||% excludeCovariateConceptSetId %||% + cached_include_covariate_id %||% cached_exclude_covariate_id) + if (isTRUE(interactive)) { + covariate_enabled <- prompt_yesno( + "Add covariate concept set selections?", + default = covariate_enabled + ) + } + include_all_covariates <- isTRUE(cached_include_all_covariates) || !isTRUE(covariate_enabled) + if (isTRUE(covariate_enabled)) { + includeCovariateConceptSetId <- collect_optional_single_id( + includeCovariateConceptSetId %||% cached_include_covariate_id, + "Covariate include concept set", + "Covariate include concept set ID [optional; leave blank if you want to include all concepts or only set an exclude concept set]: " + ) + excludeCovariateConceptSetId <- collect_optional_single_id( + excludeCovariateConceptSetId %||% cached_exclude_covariate_id, + "Covariate exclude concept set", + "Covariate exclude concept set ID [optional]: " + ) + include_all_covariates <- is.null(includeCovariateConceptSetId) + } else { + includeCovariateConceptSetId <- NULL + excludeCovariateConceptSetId <- NULL + } + + default_analytic_settings <- .studyAgentDefaultCohortMethodAnalyticSettings( + covariate_enabled = covariate_enabled + ) + + cached_get_db <- cached_analytics$get_db_cohort_method_data %||% list() + cached_study_pop <- cached_analytics$create_study_population %||% list() + cached_ps <- cached_analytics$create_ps %||% list() + cached_ps_adjustment <- cached_analytics$ps_adjustment %||% list() + cached_match <- cached_analytics$match_on_ps %||% list() + cached_stratify <- cached_analytics$stratify_by_ps %||% list() + cached_outcome_model <- cached_analytics$fit_outcome_model %||% list() + cached_covariates <- cached_analytics$covariate_concept_sets %||% list() + + merge_or_default <- function(default_value, cache_value) { + if (is.null(cache_value) || (is.numeric(cache_value) && length(cache_value) == 0)) { + default_value + } else { + cache_value + } + } + + effective_analytic_settings <- list( + profile_name = merge_or_default(default_analytic_settings$profile_name, cached_analytics$profile_name), + source = "manual_shell", + customized_sections = character(0), + get_db_cohort_method_data = list( + studyStartDate = merge_or_default( + default_analytic_settings$get_db_cohort_method_data$studyStartDate, + cached_get_db$studyStartDate + ), + studyEndDate = merge_or_default( + default_analytic_settings$get_db_cohort_method_data$studyEndDate, + cached_get_db$studyEndDate + ), + firstExposureOnly = merge_or_default( + default_analytic_settings$get_db_cohort_method_data$firstExposureOnly, + cached_get_db$firstExposureOnly + ), + washoutPeriod = as.integer(merge_or_default( + default_analytic_settings$get_db_cohort_method_data$washoutPeriod, + cached_get_db$washoutPeriod + )), + restrictToCommonPeriod = isTRUE(cached_get_db$restrictToCommonPeriod %||% default_analytic_settings$get_db_cohort_method_data$restrictToCommonPeriod), + removeDuplicateSubjects = merge_or_default( + default_analytic_settings$get_db_cohort_method_data$removeDuplicateSubjects, + cached_get_db$removeDuplicateSubjects + ) + ), + create_study_population = list( + maxCohortSize = as.integer(merge_or_default( + default_analytic_settings$create_study_population$maxCohortSize, + cached_study_pop$maxCohortSize + )), + removeDuplicateSubjects = merge_or_default( + default_analytic_settings$create_study_population$removeDuplicateSubjects, + cached_study_pop$removeDuplicateSubjects + ), + removeSubjectsWithPriorOutcome = isTRUE(cached_study_pop$removeSubjectsWithPriorOutcome %||% + default_analytic_settings$create_study_population$removeSubjectsWithPriorOutcome), + priorOutcomeLookback = as.integer(merge_or_default( + default_analytic_settings$create_study_population$priorOutcomeLookback, + cached_study_pop$priorOutcomeLookback + )), + minDaysAtRisk = as.integer(merge_or_default( + default_analytic_settings$create_study_population$minDaysAtRisk, + cached_study_pop$minDaysAtRisk + )), + riskWindowStart = as.integer(merge_or_default( + default_analytic_settings$create_study_population$riskWindowStart, + cached_study_pop$riskWindowStart + )), + startAnchor = merge_or_default( + default_analytic_settings$create_study_population$startAnchor, + cached_study_pop$startAnchor + ), + riskWindowEnd = as.integer(merge_or_default( + default_analytic_settings$create_study_population$riskWindowEnd, + cached_study_pop$riskWindowEnd + )), + endAnchor = merge_or_default( + default_analytic_settings$create_study_population$endAnchor, + cached_study_pop$endAnchor + ), + censorAtNewRiskWindow = isTRUE(cached_study_pop$censorAtNewRiskWindow %||% + default_analytic_settings$create_study_population$censorAtNewRiskWindow) + ), + create_ps = list( + estimator = merge_or_default( + default_analytic_settings$create_ps$estimator, + cached_ps$estimator + ), + maxCohortSizeForFitting = as.integer(merge_or_default( + default_analytic_settings$create_ps$maxCohortSizeForFitting, + cached_ps$maxCohortSizeForFitting + )), + errorOnHighCorrelation = isTRUE(cached_ps$errorOnHighCorrelation %||% default_analytic_settings$create_ps$errorOnHighCorrelation), + useRegularization = isTRUE(cached_ps$useRegularization %||% default_analytic_settings$create_ps$useRegularization) + ), + ps_adjustment = list( + strategy = merge_or_default( + default_analytic_settings$ps_adjustment$strategy, + cached_ps_adjustment$strategy + ), + trimmingStrategy = merge_or_default( + default_analytic_settings$ps_adjustment$trimmingStrategy, + cached_ps_adjustment$trimmingStrategy + ), + trimmingPercent = as.numeric(merge_or_default( + default_analytic_settings$ps_adjustment$trimmingPercent, + cached_ps_adjustment$trimmingPercent + )), + equipoiseLowerBound = as.numeric(merge_or_default( + default_analytic_settings$ps_adjustment$equipoiseLowerBound, + cached_ps_adjustment$equipoiseLowerBound + )), + equipoiseUpperBound = as.numeric(merge_or_default( + default_analytic_settings$ps_adjustment$equipoiseUpperBound, + cached_ps_adjustment$equipoiseUpperBound + )) + ), + match_on_ps = list( + caliper = as.numeric(merge_or_default( + default_analytic_settings$match_on_ps$caliper, + cached_match$caliper + )), + caliperScale = merge_or_default( + default_analytic_settings$match_on_ps$caliperScale, + cached_match$caliperScale + ), + maxRatio = as.integer(merge_or_default( + default_analytic_settings$match_on_ps$maxRatio, + cached_match$maxRatio + )) + ), + stratify_by_ps = list( + numberOfStrata = as.integer(merge_or_default( + default_analytic_settings$stratify_by_ps$numberOfStrata, + cached_stratify$numberOfStrata + )), + baseSelection = merge_or_default( + default_analytic_settings$stratify_by_ps$baseSelection, + cached_stratify$baseSelection + ) + ), + fit_outcome_model = list( + modelType = merge_or_default( + default_analytic_settings$fit_outcome_model$modelType, + cached_outcome_model$modelType + ), + stratified = isTRUE(cached_outcome_model$stratified %||% default_analytic_settings$fit_outcome_model$stratified), + useCovariates = isTRUE(cached_outcome_model$useCovariates %||% default_analytic_settings$fit_outcome_model$useCovariates), + inversePtWeighting = isTRUE(cached_outcome_model$inversePtWeighting %||% default_analytic_settings$fit_outcome_model$inversePtWeighting), + useRegularization = isTRUE(cached_outcome_model$useRegularization %||% default_analytic_settings$fit_outcome_model$useRegularization) + ), + covariate_concept_sets = list( + enabled = isTRUE(cached_covariates$enabled %||% covariate_enabled), + include_all_concepts = isTRUE(cached_covariates$include_all_concepts %||% include_all_covariates %||% TRUE), + include_concept_set_id = json_int_or_null(merge_or_default(cached_covariates$include_concept_set_id, includeCovariateConceptSetId)), + exclude_concept_set_id = json_int_or_null(merge_or_default(cached_covariates$exclude_concept_set_id, excludeCovariateConceptSetId)) + ) + ) + + has_function_argument_description <- !is.null(analyticSettingsDescription) || !is.null(analytic_settings_description_path_resolved) + cached_mode <- as.character(cached_inputs$analytic_settings_mode %||% if (has_function_argument_description) "free_text" else "step_by_step") + analytic_settings_mode <- if (isTRUE(interactive)) { + mode_default <- if (has_function_argument_description || identical(cached_mode, "free_text")) "free-text" else "step-by-step" + cat("\nHow would you like to configure analytic settings?\n") + cat(" 1. Step-by-step\n") + cat(" Walk through the required analytic settings sections in order.\n") + cat(" In the current stage, the shell walks the section flow and shows the OHDSI defaults for the remaining sub-settings.\n") + cat(" 2. Free-text\n") + cat(" Describe the analytic settings you want in natural language.\n") + cat(" The shell will create a dummy recommendation JSON, show the proposed key/value pairs, and ask you to confirm.\n") + mode_choice <- collect_choice_value( + value = mode_default, + label = "Analytic settings configuration mode", + choices = c("step-by-step", "free-text"), + prompt = "Choose analytic settings mode by number.", + default = mode_default + ) + if (identical(mode_choice, "free-text")) "free_text" else "step_by_step" + } else if (has_function_argument_description || + (identical(cached_mode, "free_text") && + (nzchar(trimws(as.character(cached_inputs$analytic_settings_description %||% ""))) || + nzchar(trimws(as.character(cached_inputs$analytic_settings_description_path %||% "")))))) { + "free_text" + } else { + "step_by_step" + } + analytic_settings_selection_source <- if (isTRUE(interactive)) "manual_prompt" else if (!is.null(cached_inputs$analytic_settings_mode)) "cached" else "default_non_interactive" + analytic_settings_input_method <- if (identical(analytic_settings_mode, "free_text")) { + as.character(cached_inputs$analytic_settings_input_method %||% "typed_text") + } else { + "step_by_step" + } + analytic_settings_description <- cached_inputs$analytic_settings_description %||% NULL + analytic_settings_description_path <- cached_inputs$analytic_settings_description_path %||% NULL + analytic_settings_recommendation_source <- as.character(cached_inputs$analytic_settings_recommendation_source %||% if (identical(analytic_settings_mode, "free_text")) "pending" else "not_applicable") + analytic_settings_acp_response_path <- json_string_or_null(cached_inputs$analytic_settings_acp_response_path) + analytic_settings_recommendation_path <- json_string_or_null(cached_inputs$analytic_settings_recommendation_path) + analytic_settings_recommendation_status <- as.character(cached_inputs$analytic_settings_recommendation_status %||% if (identical(analytic_settings_mode, "free_text")) "pending" else "not_applicable") + analytic_settings_confirmed <- isTRUE(cached_inputs$analytic_settings_confirmed %||% FALSE) + analytic_settings_section_flow <- c("study_population", "time_at_risk", "propensity_score_adjustment", "outcome_model") + + effective_analytic_settings$covariate_concept_sets$include_all_concepts <- isTRUE(!isTRUE(covariate_enabled)) || + isTRUE(include_all_covariates) + + if (identical(analytic_settings_mode, "step_by_step")) { + if (isTRUE(interactive)) { + cat("\nAnalytic settings mode: step-by-step\n") + cat("The shell will collect each required section in order and ask for the analytic settings profile name last.\n") + } + + step_by_step_io <- list( + section_header = function(label) { + cat(sprintf("\n[%s]\n", label)) + }, + text = function(prompt, default = "", allow_blank = FALSE) { + entered <- trimws(readline(sprintf("%s [%s]: ", prompt, default))) + if (!nzchar(entered)) { + if (isTRUE(allow_blank)) return(default) + return(default) + } + entered + }, + yesno = function(prompt, default = TRUE) { + prompt_yesno_strict(prompt, default = default) + }, + choice = function(prompt, choices, default, labels = choices) { + default_index <- match(default, choices) + if (is.na(default_index)) default_index <- 1L + selected_label <- collect_choice_value( + value = labels[[default_index]], + label = prompt, + choices = labels, + prompt = prompt, + default = labels[[default_index]] + ) + choices[[match(selected_label, labels)]] + }, + integer = function(prompt, default, min_value = NULL, allow_negative = TRUE) { + repeat { + value <- prompt_integer( + prompt = prompt, + default = default, + allow_null = FALSE, + must_be_positive = FALSE, + allow_negative = allow_negative + ) + if (!is.null(min_value) && value < min_value) { + cat(sprintf("Please enter an integer >= %s.\n", min_value)) + next + } + return(value) + } + }, + numeric = function(prompt, default, min_value = NULL) { + repeat { + value <- prompt_numeric( + prompt = prompt, + default = default, + must_be_positive = FALSE + ) + if (!is.null(min_value) && value < min_value) { + cat(sprintf("Please enter a number >= %s.\n", min_value)) + next + } + return(value) + } + } + ) + step_by_step_result <- .studyAgentCollectStepByStepAnalyticSettings( + default_settings = default_analytic_settings, + seed_settings = effective_analytic_settings, + interactive = interactive, + io = step_by_step_io + ) + effective_analytic_settings <- step_by_step_result$settings + analytic_settings_section_flow <- step_by_step_result$section_flow + + analytic_settings_description <- NULL + analytic_settings_description_path <- NULL + analytic_settings_recommendation_source <- "not_applicable" + analytic_settings_acp_response_path <- NA_character_ + analytic_settings_recommendation_path <- NA_character_ + analytic_settings_recommendation_status <- "not_applicable" + analytic_settings_confirmed <- TRUE + } else { + repeat { + if (!is.null(analyticSettingsDescription)) { + analytic_settings_input_method <- "function_argument_text" + analytic_settings_description <- analyticSettingsDescription + analytic_settings_description_path <- NULL + } else if (!is.null(analytic_settings_description_path_resolved)) { + if (!file.exists(analytic_settings_description_path_resolved)) { + stop(sprintf("Analytic settings description file not found: %s", analytic_settings_description_path_resolved)) + } + file_lines <- readLines(analytic_settings_description_path_resolved, warn = FALSE) + analytic_settings_description <- trimws(paste(file_lines, collapse = "\n")) + if (!nzchar(analytic_settings_description)) { + stop(sprintf("Analytic settings description file is empty: %s", analytic_settings_description_path_resolved)) + } + analytic_settings_input_method <- "function_argument_path" + analytic_settings_description_path <- analytic_settings_description_path_resolved + } else if (!is.null(analytic_settings_description) && nzchar(trimws(as.character(analytic_settings_description)))) { + analytic_settings_description <- trimws(as.character(analytic_settings_description)) + analytic_settings_input_method <- as.character(cached_inputs$analytic_settings_input_method %||% "typed_text") + } else if (!is.null(analytic_settings_description_path) && nzchar(trimws(as.character(analytic_settings_description_path)))) { + cached_description_path <- normalizePath(resolve_path(as.character(analytic_settings_description_path), study_base_dir), winslash = "/", mustWork = FALSE) + if (!file.exists(cached_description_path)) { + stop(sprintf("Cached analytic settings description file not found: %s", cached_description_path)) + } + file_lines <- readLines(cached_description_path, warn = FALSE) + analytic_settings_description <- trimws(paste(file_lines, collapse = "\n")) + if (!nzchar(analytic_settings_description)) { + stop(sprintf("Cached analytic settings description file is empty: %s", cached_description_path)) + } + analytic_settings_input_method <- "cached_path" + analytic_settings_description_path <- cached_description_path + } else if (isTRUE(interactive)) { + analytic_settings_description <- prompt_non_null_text( + "Study description for analytic settings", + default = analytic_settings_description + ) + analytic_settings_input_method <- "typed_text" + analytic_settings_description_path <- NULL + } else { + stop("Free-text analytic settings mode requires `analyticSettingsDescription`, `analyticSettingsDescriptionPath`, or a cached description in non-interactive runs.") + } + + acp_request_body <- list( + study_intent = studyIntent, + study_description = analytic_settings_description, + analytic_settings_description = analytic_settings_description + ) + if (isTRUE(interactive)) { + cat("Calling ACP flow: cohort_methods_specifications_recommendation\n") + } else { + message("Calling ACP flow: cohort_methods_specifications_recommendation") + } + + ensure_acp_ready(acpUrl) + shell_suggestion_response <- tryCatch( + suggestCohortMethodSpecs( + studyIntent = acp_request_body$study_intent, + analyticSettingsDescription = acp_request_body$analytic_settings_description, + interactive = FALSE + ), + error = function(e) { + list( + source = "stub_no_acp", + status = "stub", + error = conditionMessage(e), + message = "ACP flow failed. Returning placeholder cohort methods specifications recommendation.", + request = acp_request_body + ) + } + ) + recommendation <- shell_suggestion_response$recommendation %||% + shell_suggestion_response$response$recommendation %||% + shell_suggestion_response$response$recommendations %||% + shell_suggestion_response$response$cohort_methods_specifications_recommendation %||% + shell_suggestion_response$cohort_methods_specifications_recommendation %||% + build_dummy_analytic_settings_recommendation( + description = acp_request_body$analytic_settings_description, + defaults_snapshot = effective_analytic_settings, + input_method = analytic_settings_input_method + ) + if (is.null(recommendation$defaults_snapshot) || length(recommendation$defaults_snapshot) == 0) { + recommendation$defaults_snapshot <- effective_analytic_settings + } + recommendation_source <- as.character( + if (identical(shell_suggestion_response$source, "stub_no_acp")) "stub_acp_placeholder" + else shell_suggestion_response$source %||% "acp_flow" + ) + acp_specifications_response <- list( + flow = "cohort_methods_specifications_recommendation", + source = recommendation_source, + status = shell_suggestion_response$status %||% "received", + request = acp_request_body, + response = shell_suggestion_response, + recommendation = recommendation + ) + + write_json(acp_specifications_response, cm_acp_specifications_recommendation_path) + analytic_settings_acp_response_path <- cm_acp_specifications_recommendation_path + analytic_settings_recommendation_source <- as.character(acp_specifications_response$source %||% "unknown") + analytic_settings_recommendation <- recommendation + write_json(analytic_settings_recommendation, cm_analytic_settings_recommendation_path) + analytic_settings_recommendation_path <- cm_analytic_settings_recommendation_path + + if (isTRUE(interactive)) { + print_analytic_settings_recommendation_preview( + acp_response = acp_specifications_response, + recommendation = analytic_settings_recommendation, + recommendation_path = analytic_settings_recommendation_path, + acp_response_path = analytic_settings_acp_response_path + ) + analytic_settings_confirmed <- FALSE + } else { + analytic_settings_confirmed <- isTRUE(cached_inputs$analytic_settings_confirmed %||% TRUE) + } + + if (identical(analytic_settings_recommendation_source, "acp_flow") && + identical(as.character(acp_specifications_response$response$status %||% acp_specifications_response$status %||% "unknown"), "ok")) { + recommendation_defaults_snapshot <- analytic_settings_recommendation$defaults_snapshot + if (is.null(recommendation_defaults_snapshot) || length(recommendation_defaults_snapshot) == 0) { + recommendation_defaults_snapshot <- effective_analytic_settings + } + effective_analytic_settings <- shell_settings_from_acp_recommendation( + analytic_settings_recommendation, + recommendation_defaults_snapshot + ) + } else { + effective_analytic_settings$profile_name <- as.character( + analytic_settings_recommendation$profile_name %||% effective_analytic_settings$profile_name + ) + } + analytic_settings_flow_status <- as.character( + acp_specifications_response$response$status %||% acp_specifications_response$status %||% "unknown" + ) + analytic_settings_recommendation_status <- if ( + identical(analytic_settings_recommendation_source, "acp_flow") && + identical(analytic_settings_flow_status, "ok") + ) { + if (isTRUE(analytic_settings_confirmed)) "confirmed_via_acp" else "received_from_acp" + } else if (identical(analytic_settings_recommendation_source, "acp_flow")) { + if (isTRUE(analytic_settings_confirmed)) "confirmed_acp_fallback" else "received_acp_fallback" + } else { + if (isTRUE(analytic_settings_confirmed)) "stub_fallback" else "stub_generated" + } + break + } + } + + if (isTRUE(interactive)) { + effective_analytic_settings <- review_analytic_settings_interactively(effective_analytic_settings) + analytic_settings_confirmed <- TRUE + } + + effective_analytic_settings$customized_sections <- names(.studyAgentAnalyticSettingsSectionPaths())[vapply( + names(.studyAgentAnalyticSettingsSectionPaths()), + function(section_name) { + paths <- .studyAgentAnalyticSettingsSectionPaths()[[section_name]] + any(vapply(paths, function(path) { + !identical( + .studyAgentGetNestedValue(effective_analytic_settings, path), + .studyAgentGetNestedValue(default_analytic_settings, path) + ) + }, logical(1))) + }, + logical(1) + )] + + effective_analytic_settings <- normalize_analytic_settings(effective_analytic_settings) + covariate_enabled <- isTRUE(effective_analytic_settings$covariate_concept_sets$enabled) + include_all_covariates <- isTRUE(effective_analytic_settings$covariate_concept_sets$include_all_concepts) + includeCovariateConceptSetId <- if (is.na(effective_analytic_settings$covariate_concept_sets$include_concept_set_id)) { + NULL + } else { + as.integer(effective_analytic_settings$covariate_concept_sets$include_concept_set_id) + } + excludeCovariateConceptSetId <- if (is.na(effective_analytic_settings$covariate_concept_sets$exclude_concept_set_id)) { + NULL + } else { + as.integer(effective_analytic_settings$covariate_concept_sets$exclude_concept_set_id) + } + + manual_intent <- list( + source = cohort_methods_intent_split_source, + intent_split_status = cohort_methods_intent_split_status, + intent_split_path = json_string_or_null(if (file.exists(cohort_methods_intent_split_path)) cohort_methods_intent_split_path else NULL), + explicit_cohort_ids_supplied = isTRUE(all_cohort_ids_from_function_args), + skip_intent_split_and_recommendation = isTRUE(skip_intent_split_and_recommendation), + skip_phenotype_improvements = isTRUE(skip_intent_split_and_recommendation), + skip_reason = json_string_or_null(if (isTRUE(skip_intent_split_and_recommendation)) "all_cohort_ids_provided" else NULL), + skip_prompt_source = if (isTRUE(all_cohort_ids_from_function_args) && isTRUE(interactive)) "interactive_user_choice" else "not_prompted", + study_intent = studyIntent, + target_statement = targetStatement, + comparator_statement = comparatorStatement, + outcome_statement = outcomeStatement, + outcome_statements = as.list(outcomeStatements) + ) + write_json(manual_intent, manual_intent_path) + + manual_inputs <- list( + study_intent = studyIntent, + target_statement = targetStatement, + comparator_statement = comparatorStatement, + outcome_statement = outcomeStatement, + outcome_statements = as.list(outcomeStatements), + cohort_methods_intent_split_path = json_string_or_null(if (file.exists(cohort_methods_intent_split_path)) cohort_methods_intent_split_path else NULL), + cohort_methods_intent_split_source = cohort_methods_intent_split_source, + cohort_methods_intent_split_status = cohort_methods_intent_split_status, + explicit_cohort_ids_supplied = isTRUE(all_cohort_ids_from_function_args), + skip_intent_split_and_recommendation = isTRUE(skip_intent_split_and_recommendation), + skip_phenotype_improvements = isTRUE(skip_intent_split_and_recommendation), + skip_reason = json_string_or_null(if (isTRUE(skip_intent_split_and_recommendation)) "all_cohort_ids_provided" else NULL), + skip_prompt_source = if (isTRUE(all_cohort_ids_from_function_args) && isTRUE(interactive)) "interactive_user_choice" else "not_prompted", + use_function_argument_ids_for_selection = isTRUE(use_function_argument_ids_for_selection), + comparison_label = comparisonLabel, + target_cohort_id = as.integer(targetCohortId), + comparator_cohort_id = as.integer(comparatorCohortId), + outcome_cohort_ids = as.integer(outcomeCohortIds), + outcome_cohort_statements = as.list(outcomeStatementsForSelectedCohorts), + target_recommendation = list( + statement = targetStatement, + path = json_string_or_null(target_rec$recommendation_path), + source = target_rec$recommendation_source, + selection_source = target_rec$selection_source, + used_cached_recommendation = isTRUE(target_rec$used_cached_recommendation), + used_cached_selection = isTRUE(target_rec$used_cached_selection), + used_window2 = isTRUE(target_rec$used_window2), + used_advice = isTRUE(target_rec$used_advice) + ), + comparator_recommendation = list( + statement = comparatorStatement, + path = json_string_or_null(comparator_rec$recommendation_path), + source = comparator_rec$recommendation_source, + selection_source = comparator_rec$selection_source, + used_cached_recommendation = isTRUE(comparator_rec$used_cached_recommendation), + used_cached_selection = isTRUE(comparator_rec$used_cached_selection), + used_window2 = isTRUE(comparator_rec$used_window2), + used_advice = isTRUE(comparator_rec$used_advice) + ), + outcome_recommendation = list( + statement = outcomeStatement, + path = json_string_or_null(outcome_rec$recommendation_path), + source = outcome_rec$recommendation_source, + selection_source = outcome_rec$selection_source, + used_cached_recommendation = isTRUE(outcome_rec$used_cached_recommendation), + used_cached_selection = isTRUE(outcome_rec$used_cached_selection), + used_window2 = isTRUE(outcome_rec$used_window2), + used_advice = isTRUE(outcome_rec$used_advice) + ), + outcome_recommendations = outcome_recommendations, + negative_control_enabled = isTRUE(negative_control_enabled), + negative_control_concept_set_id = json_int_or_null(negativeControlConceptSetId), + covariate_concept_sets_enabled = isTRUE(covariate_enabled), + covariate_include_all_concepts = isTRUE(include_all_covariates), + covariate_include_concept_set_id = json_int_or_null(includeCovariateConceptSetId), + covariate_exclude_concept_set_id = json_int_or_null(excludeCovariateConceptSetId), + target_name = target_name, + comparator_name = comparator_name, + outcome_names = as.list(outcome_names), + target_original_name = target_original_name, + comparator_original_name = comparator_original_name, + outcome_original_names = as.list(outcome_original_names), + target_analysis_label = target_name, + comparator_analysis_label = comparator_name, + outcome_analysis_labels = as.list(outcome_names), + target_description = target_desc, + comparator_description = comparator_desc, + outcome_descriptions = as.list(outcome_descs), + customized_sections = as.list(effective_analytic_settings$customized_sections), + analytic_settings_mode = analytic_settings_mode, + analytic_settings_selection_source = analytic_settings_selection_source, + analytic_settings_input_method = analytic_settings_input_method, + analytic_settings_description = json_string_or_null(analytic_settings_description), + analytic_settings_description_path = json_string_or_null(analytic_settings_description_path), + analytic_settings_recommendation_source = analytic_settings_recommendation_source, + analytic_settings_acp_response_path = json_string_or_null(analytic_settings_acp_response_path), + analytic_settings_recommendation_path = json_string_or_null(analytic_settings_recommendation_path), + analytic_settings_recommendation_status = analytic_settings_recommendation_status, + analytic_settings_confirmed = isTRUE(analytic_settings_confirmed), + analytic_settings_section_flow = as.list(analytic_settings_section_flow), + cm_analysis_json_path = cm_analysis_json_path, + cm_analysis_template_path = json_string_or_null(cm_analysis_template_path), + remap_cohort_ids = use_mapping, + cohort_id_base = cohortIdBase + ) + manual_inputs$analytic_settings <- effective_analytic_settings + write_json(manual_inputs, manual_inputs_path) + + improvements_applied <- any(vapply(improvements_results, function(x) isTRUE(x$applied), logical(1))) + if (isTRUE(improvements_applied)) { + ensure_complete_patched_role <- function(selected_role_dir, patched_role_dir, cohort_ids) { + ensure_dir(patched_role_dir) + ensure_dir(patched_dir) + for (cid in as.integer(cohort_ids)) { + role_path <- file.path(patched_role_dir, sprintf("%s.json", cid)) + selected_path <- file.path(selected_role_dir, sprintf("%s.json", cid)) + combined_path <- file.path(patched_dir, sprintf("%s.json", cid)) + if (!file.exists(role_path) && file.exists(selected_path)) { + file.copy(selected_path, role_path, overwrite = TRUE) + } + source_path <- if (file.exists(role_path)) role_path else selected_path + if (file.exists(source_path)) { + file.copy(source_path, combined_path, overwrite = TRUE) + } + } + } + ensure_complete_patched_role(selected_target_dir, patched_target_dir, new_target_id) + ensure_complete_patched_role(selected_comparator_dir, patched_comparator_dir, new_comparator_id) + ensure_complete_patched_role(selected_outcome_dir, patched_outcome_dir, new_outcome_ids) + } + improvements_cache_used <- vapply(improvements_results, function(x) isTRUE(x$used_cache), logical(1)) + improvements_flow_called <- vapply(improvements_results, function(x) isTRUE(x$flow_called), logical(1)) + + cohort_map <- data.frame( + original_id = c(selected_target_id, selected_comparator_id, selected_outcome_ids), + cohort_id = c(new_target_id, new_comparator_id, new_outcome_ids), + role = c("target", "comparator", rep("outcome", length(new_outcome_ids))), + cohort_name = c(target_name, comparator_name, outcome_names), + original_cohort_name = c(target_original_name, comparator_original_name, outcome_original_names), + short_description = c(target_desc, comparator_desc, outcome_descs), + stringsAsFactors = FALSE + ) + write_json(list(mapping = cohort_map), cohort_id_map_path) + + write_json( + list( + comparison_label = comparisonLabel, + targets = as.integer(new_target_id), + comparators = as.integer(new_comparator_id), + outcomes = as.integer(new_outcome_ids) + ), + cohort_roles_path + ) + + cm_comparisons <- list( + comparisons = list( + list( + comparison_id = 1L, + label = comparisonLabel, + study_intent = studyIntent, + target = list( + source_id = as.integer(selected_target_id), + cohort_id = as.integer(new_target_id), + name = target_name, + original_name = target_original_name + ), + comparator = list( + source_id = as.integer(selected_comparator_id), + cohort_id = as.integer(new_comparator_id), + name = comparator_name, + original_name = comparator_original_name + ), + outcomes = lapply(seq_along(new_outcome_ids), function(i) { + list( + source_id = as.integer(selected_outcome_ids[[i]]), + cohort_id = as.integer(new_outcome_ids[[i]]), + name = outcome_names[[i]], + original_name = outcome_original_names[[i]], + statement = outcomeStatementsForSelectedCohorts[[i]] + ) + }) + ) + ) + ) + write_json(cm_comparisons, cm_comparisons_path) + + role_statuses <- vapply(improvements_results, function(x) as.character(x$status %||% "not_run"), character(1)) + improvements_status_value <- if (all(role_statuses %in% c("completed", "not_applicable"))) { + "completed" + } else if (all(role_statuses %in% c("skipped", "not_applicable"))) { + "skipped" + } else if (any(role_statuses %in% c("completed", "completed_with_errors"))) { + "partial" + } else { + "not_run" + } + improvements_status <- list( + status = improvements_status_value, + flow = "phenotype_improvements", + applies_to = c("target", "comparator", "outcome"), + auto_apply = isTRUE(autoApplyImprovements), + applied = isTRUE(improvements_applied), + combined_patched_dir = patched_dir, + role_artifacts = list( + target = improvements_target_path, + comparator = improvements_comparator_path, + outcome = improvements_outcome_path + ), + roles = improvements_results + ) + write_json(improvements_status, improvements_status_path) + + cm_evaluation_todo <- list( + status = "todo", + items = list( + list( + name = "negative_controls", + status = if (isTRUE(negative_control_enabled)) "dummy_selected" else "todo", + enabled = isTRUE(negative_control_enabled), + concept_set_id = json_int_or_null(negativeControlConceptSetId), + source = json_string_or_null(if (isTRUE(negative_control_enabled)) "manual_shell" else NULL) + ), + list(name = "positive_control_synthesis", status = "todo"), + list(name = "empirical_calibration", status = "todo") + ), + note = "Current stage only scaffolds CohortMethod execution for outcomes of interest." + ) + write_json(cm_evaluation_todo, cm_evaluation_todo_path) + + create_dummy_concept_set <- function(path, concept_set_id, label) { + if (is.null(concept_set_id)) return(NULL) + payload <- list( + conceptSetId = as.integer(concept_set_id), + name = sprintf("Dummy %s %s", label, concept_set_id), + expression = list(items = list()), + note = "Placeholder only. Replace this dummy concept set with real concept set content in a later stage." + ) + write_json(payload, path) + path + } + + negative_control_path <- create_dummy_concept_set( + file.path(concept_sets_dir, "negative_control_concept_set.json"), + negativeControlConceptSetId, + "negative control concept set" + ) + covariate_include_path <- create_dummy_concept_set( + file.path(concept_sets_dir, "covariate_include_concept_set.json"), + includeCovariateConceptSetId, + "covariate include concept set" + ) + covariate_exclude_path <- create_dummy_concept_set( + file.path(concept_sets_dir, "covariate_exclude_concept_set.json"), + excludeCovariateConceptSetId, + "covariate exclude concept set" + ) + + cm_concept_set_selections <- list( + negative_control = list( + enabled = isTRUE(negative_control_enabled), + concept_set_id = json_int_or_null(negativeControlConceptSetId), + artifact_path = json_string_or_null(negative_control_path), + status = if (isTRUE(negative_control_enabled)) "dummy_selected" else "not_selected" + ), + covariates = list( + enabled = isTRUE(covariate_enabled), + include_all_concepts = isTRUE(include_all_covariates), + include = list( + concept_set_id = json_int_or_null(includeCovariateConceptSetId), + artifact_path = json_string_or_null(covariate_include_path) + ), + exclude = list( + concept_set_id = json_int_or_null(excludeCovariateConceptSetId), + artifact_path = json_string_or_null(covariate_exclude_path) + ), + status = if (isTRUE(covariate_enabled)) "dummy_selected" else "not_selected" + ), + note = "Concept set IDs are manual placeholders in the current R-only stage." + ) + write_json(cm_concept_set_selections, cm_concept_set_selections_path) + + cm_defaults <- list( + analysis_id = 1L, + description = effective_analytic_settings$profile_name, + profile_name = effective_analytic_settings$profile_name, + source = "manual_shell", + mode = analytic_settings_mode, + input_method = analytic_settings_input_method, + recommendation_path = json_string_or_null(analytic_settings_recommendation_path), + customized_sections = effective_analytic_settings$customized_sections, + get_db_cohort_method_data = effective_analytic_settings$get_db_cohort_method_data, + create_study_population = effective_analytic_settings$create_study_population, + create_ps = effective_analytic_settings$create_ps, + ps_adjustment = effective_analytic_settings$ps_adjustment, + match_on_ps = effective_analytic_settings$match_on_ps, + stratify_by_ps = effective_analytic_settings$stratify_by_ps, + fit_outcome_model = effective_analytic_settings$fit_outcome_model, + covariate_concept_sets = effective_analytic_settings$covariate_concept_sets + ) + cm_defaults$covariate_concept_sets$enabled <- isTRUE(effective_analytic_settings$covariate_concept_sets$enabled) + cm_defaults$covariate_concept_sets$note <- "Placeholder only. Dummy concept set IDs are captured for future concept set materialization." + cm_defaults$get_db_cohort_method_data$removeDuplicateSubjects <- as.character(cm_defaults$get_db_cohort_method_data$removeDuplicateSubjects) + cm_defaults$create_study_population$removeDuplicateSubjects <- as.character(cm_defaults$create_study_population$removeDuplicateSubjects) + cm_defaults$cm_analysis_json_path <- cm_analysis_json_path + write_json(cm_defaults, cm_defaults_path) + + cm_analysis_template <- .studyAgentLoadCmAnalysisTemplate(cm_analysis_template_path) + cm_analysis_json <- .studyAgentBuildCmAnalysisJson( + settings = effective_analytic_settings, + template = cm_analysis_template + ) + write_json(cm_analysis_json, cm_analysis_json_path) + + cohort_rows <- list( + data.frame( + atlas_id = selected_target_id, + cohort_id = new_target_id, + cohort_name = target_name, + cohort_type = "target", + logic_description = if (nzchar(target_desc)) target_desc else "Manual target cohort selection", + generate_stats = TRUE, + stringsAsFactors = FALSE + ), + data.frame( + atlas_id = selected_comparator_id, + cohort_id = new_comparator_id, + cohort_name = comparator_name, + cohort_type = "comparator", + logic_description = if (nzchar(comparator_desc)) comparator_desc else "Manual comparator cohort selection", + generate_stats = TRUE, + stringsAsFactors = FALSE + ) + ) + if (length(new_outcome_ids) > 0) { + for (i in seq_along(new_outcome_ids)) { + cohort_rows[[length(cohort_rows) + 1]] <- data.frame( + atlas_id = selected_outcome_ids[[i]], + cohort_id = new_outcome_ids[[i]], + cohort_name = outcome_names[[i]], + cohort_type = "outcome", + logic_description = if (nzchar(outcome_descs[[i]])) outcome_descs[[i]] else "Manual outcome cohort selection", + generate_stats = TRUE, + stringsAsFactors = FALSE + ) + } + } + cohort_df <- do.call(rbind, cohort_rows) + cohort_csv <- file.path(selected_dir, "Cohorts.csv") + write.csv(cohort_df, cohort_csv, row.names = FALSE) + + state <- list( + study_intent = studyIntent, + target_statement = targetStatement, + comparator_statement = comparatorStatement, + outcome_statement = outcomeStatement, + outcome_statements = as.list(outcomeStatements), + outcome_cohort_statements = as.list(outcomeStatementsForSelectedCohorts), + comparison_label = comparisonLabel, + target_analysis_label = target_name, + comparator_analysis_label = comparator_name, + outcome_analysis_labels = as.list(outcome_names), + target_original_name = target_original_name, + comparator_original_name = comparator_original_name, + outcome_original_names = as.list(outcome_original_names), + output_dir = output_dir, + selected_dir = selected_dir, + patched_dir = patched_dir, + selected_target_dir = selected_target_dir, + selected_comparator_dir = selected_comparator_dir, + selected_outcome_dir = selected_outcome_dir, + patched_target_dir = patched_target_dir, + patched_comparator_dir = patched_comparator_dir, + patched_outcome_dir = patched_outcome_dir, + keeper_dir = keeper_dir, + analysis_settings_dir = analysis_settings_dir, + scripts_dir = scripts_dir, + cm_results_dir = cm_results_dir, + cm_diagnostics_dir = cm_diagnostics_dir, + cm_data_dir = cm_data_dir, + manual_intent_path = manual_intent_path, + manual_inputs_path = manual_inputs_path, + cohort_methods_intent_split_path = json_string_or_null(if (file.exists(cohort_methods_intent_split_path)) cohort_methods_intent_split_path else NULL), + cohort_id_map_path = cohort_id_map_path, + cohort_roles_path = cohort_roles_path, + cm_comparisons_path = cm_comparisons_path, + improvements_status_path = improvements_status_path, + improvements_target_path = improvements_target_path, + improvements_comparator_path = improvements_comparator_path, + improvements_outcome_path = improvements_outcome_path, + improvements_paths = list( + target = improvements_target_path, + comparator = improvements_comparator_path, + outcome = improvements_outcome_path, + status = improvements_status_path + ), + improvements_cache_used = as.list(improvements_cache_used), + improvements_flow_called = as.list(improvements_flow_called), + improvements_applied = isTRUE(improvements_applied), + improvements_auto_apply = isTRUE(autoApplyImprovements), + improvements_prompt_choices = list( + target = isTRUE(do_target_improvements), + comparator = isTRUE(do_comparator_improvements), + outcome = isTRUE(do_outcome_improvements) + ), + improvements_results = improvements_results, + cm_evaluation_todo_path = cm_evaluation_todo_path, + cm_defaults_path = cm_defaults_path, + cm_analysis_json_path = cm_analysis_json_path, + cm_analysis_template_path = json_string_or_null(cm_analysis_template_path), + cm_acp_specifications_recommendation_path = json_string_or_null(analytic_settings_acp_response_path), + cm_analytic_settings_recommendation_path = json_string_or_null(analytic_settings_recommendation_path), + cm_concept_set_selections_path = cm_concept_set_selections_path, + cohort_csv = cohort_csv, + used_cached_inputs = !is.null(cached_inputs), + cohort_methods_intent_split_source = cohort_methods_intent_split_source, + cohort_methods_intent_split_status = cohort_methods_intent_split_status, + explicit_cohort_ids_supplied = isTRUE(all_cohort_ids_from_function_args), + skip_intent_split_and_recommendation = isTRUE(skip_intent_split_and_recommendation), + skip_phenotype_improvements = isTRUE(skip_intent_split_and_recommendation), + skip_reason = json_string_or_null(if (isTRUE(skip_intent_split_and_recommendation)) "all_cohort_ids_provided" else NULL), + skip_prompt_source = if (isTRUE(all_cohort_ids_from_function_args) && isTRUE(interactive)) "interactive_user_choice" else "not_prompted", + use_function_argument_ids_for_selection = isTRUE(use_function_argument_ids_for_selection), + resume_enabled = isTRUE(resume), + remap_cohort_ids = use_mapping, + cohort_id_base = cohortIdBase, + analytic_settings_mode = analytic_settings_mode, + analytic_settings_selection_source = analytic_settings_selection_source, + analytic_settings_input_method = analytic_settings_input_method, + analytic_settings_description = json_string_or_null(analytic_settings_description), + analytic_settings_description_path = json_string_or_null(analytic_settings_description_path), + analytic_settings_recommendation_source = analytic_settings_recommendation_source, + analytic_settings_acp_response_path = json_string_or_null(analytic_settings_acp_response_path), + analytic_settings_recommendation_status = analytic_settings_recommendation_status, + analytic_settings_confirmed = isTRUE(analytic_settings_confirmed), + analytic_settings_section_flow = as.list(analytic_settings_section_flow), + analytic_settings_profile_name = effective_analytic_settings$profile_name, + analytic_settings_customized_sections = as.character(effective_analytic_settings$customized_sections), + analytic_settings = effective_analytic_settings, + negative_control_enabled = isTRUE(negative_control_enabled), + negative_control_concept_set_id = json_int_or_null(negativeControlConceptSetId), + covariate_concept_sets_enabled = isTRUE(covariate_enabled), + covariate_include_all_concepts = isTRUE(include_all_covariates), + covariate_include_concept_set_id = json_int_or_null(includeCovariateConceptSetId), + covariate_exclude_concept_set_id = json_int_or_null(excludeCovariateConceptSetId), + target_recommendation_path = json_string_or_null(target_rec$recommendation_path), + comparator_recommendation_path = json_string_or_null(comparator_rec$recommendation_path), + outcome_recommendation_path = json_string_or_null(outcome_rec$recommendation_path), + outcome_recommendation_paths = as.list(vapply( + outcome_recommendations, + function(rec) as.character(rec$path %||% NA_character_), + character(1) + )), + target_recommendation_source = target_rec$recommendation_source, + comparator_recommendation_source = comparator_rec$recommendation_source, + outcome_recommendation_source = outcome_rec$recommendation_source, + target_selection_source = target_rec$selection_source, + comparator_selection_source = comparator_rec$selection_source, + outcome_selection_source = outcome_rec$selection_source, + target_used_cached_recommendation = isTRUE(target_rec$used_cached_recommendation), + comparator_used_cached_recommendation = isTRUE(comparator_rec$used_cached_recommendation), + outcome_used_cached_recommendation = isTRUE(outcome_rec$used_cached_recommendation), + target_used_cached_selection = isTRUE(target_rec$used_cached_selection), + comparator_used_cached_selection = isTRUE(comparator_rec$used_cached_selection), + outcome_used_cached_selection = isTRUE(outcome_rec$used_cached_selection), + outcome_recommendations = outcome_recommendations, + target_ids = as.integer(new_target_id), + comparator_ids = as.integer(new_comparator_id), + outcome_ids = as.integer(new_outcome_ids) + ) + write_json(state, state_path) + + package_root <- resolve_path("R/OHDSIAssistant", study_base_dir) + if (!dir.exists(package_root)) { + alt <- file.path(getwd(), "R", "OHDSIAssistant") + if (dir.exists(alt)) package_root <- alt + } + package_root <- normalizePath(package_root, winslash = "/", mustWork = FALSE) + + script_header <- c( + "# Generated by OHDSIAssistant::runStrategusCohortMethodsShell", + "# Edit values as needed and run in order.", + "# Current stage: manual shell output with ACP/MCP status artifacts.", + "" + ) + package_loader_lines <- c( + sprintf("package_root <- '%s'", package_root), + "if (!requireNamespace('OHDSIAssistant', quietly = TRUE)) {", + " if (requireNamespace('devtools', quietly = TRUE) && dir.exists(package_root)) {", + " devtools::load_all(package_root)", + " } else {", + " stop('OHDSIAssistant is not installed and devtools::load_all(package_root) is unavailable: ', package_root)", + " }", + "}", + "library(OHDSIAssistant)" + ) + + script_02 <- c( + script_header, + "library(jsonlite)", + "`%||%` <- function(x, y) if (is.null(x)) y else x", + "", + sprintf("base_dir <- '%s'", base_dir), + "output_dir <- file.path(base_dir, 'outputs')", + "selected_dir <- file.path(base_dir, 'selected-cohorts')", + "selected_target_dir <- file.path(base_dir, 'selected-target-cohorts')", + "selected_comparator_dir <- file.path(base_dir, 'selected-comparator-cohorts')", + "selected_outcome_dir <- file.path(base_dir, 'selected-outcome-cohorts')", + "patched_dir <- file.path(base_dir, 'patched-cohorts')", + "patched_target_dir <- file.path(base_dir, 'patched-target-cohorts')", + "patched_comparator_dir <- file.path(base_dir, 'patched-comparator-cohorts')", + "patched_outcome_dir <- file.path(base_dir, 'patched-outcome-cohorts')", + "dir.create(patched_dir, recursive = TRUE, showWarnings = FALSE)", + "for (dir_path in c(patched_dir, patched_target_dir, patched_comparator_dir, patched_outcome_dir)) {", + " if (dir.exists(dir_path)) unlink(list.files(dir_path, pattern = '\\\\.(json)$', full.names = TRUE), force = TRUE)", + " sql_dir <- file.path(dir_path, 'sql')", + " if (dir.exists(sql_dir)) unlink(list.files(sql_dir, pattern = '\\\\.(sql)$', full.names = TRUE), force = TRUE)", + "}", + "", + "apply_action <- function(obj, action) {", + " path <- action$path %||% ''", + " value <- action$value", + " if (!nzchar(path)) return(obj)", + " segs <- strsplit(path, '/', fixed = TRUE)[[1]]", + " segs <- segs[segs != '']", + " set_in <- function(x, segs, value) {", + " if (length(segs) == 0) return(value)", + " seg <- segs[[1]]", + " name <- seg", + " idx <- NA_integer_", + " if (grepl('\\\\[\\\\d+\\\\]$', seg)) {", + " name <- sub('\\\\[\\\\d+\\\\]$', '', seg)", + " idx <- as.integer(sub('^.*\\\\[(\\\\d+)\\\\]$', '\\\\1', seg))", + " }", + " if (name != '') {", + " if (is.null(x[[name]])) x[[name]] <- list()", + " if (length(segs) == 1) {", + " if (!is.na(idx)) {", + " while (length(x[[name]]) < idx) x[[name]][[length(x[[name]]) + 1]] <- list()", + " x[[name]][[idx]] <- value", + " } else {", + " x[[name]] <- value", + " }", + " return(x)", + " }", + " if (!is.na(idx)) {", + " while (length(x[[name]]) < idx) x[[name]][[length(x[[name]]) + 1]] <- list()", + " x[[name]][[idx]] <- set_in(x[[name]][[idx]], segs[-1], value)", + " } else {", + " x[[name]] <- set_in(x[[name]], segs[-1], value)", + " }", + " return(x)", + " }", + " idx <- suppressWarnings(as.integer(seg))", + " if (is.na(idx)) return(x)", + " if (idx == 0) idx <- 1", + " while (length(x) < idx) x[[length(x) + 1]] <- list()", + " if (length(segs) == 1) {", + " x[[idx]] <- value", + " return(x)", + " }", + " x[[idx]] <- set_in(x[[idx]], segs[-1], value)", + " x", + " }", + " set_in(obj, segs, value)", + "}", + "is_mutating_improvement_action <- function(action) {", + " action_type <- tolower(trimws(as.character(action$type %||% 'note')))", + " action_type %in% c('set', 'replace', 'update')", + "}", + "", + "apply_for_role <- function(improvements_path, selected_role_dir, patched_role_dir) {", + " if (!file.exists(improvements_path)) return(invisible(FALSE))", + " improvements <- jsonlite::fromJSON(improvements_path, simplifyVector = FALSE)", + " dir.create(patched_role_dir, recursive = TRUE, showWarnings = FALSE)", + " applied <- FALSE", + " for (cid in names(improvements)) {", + " resp <- improvements[[cid]]", + " core <- resp$full_result %||% resp", + " items <- core$phenotype_improvements %||% list()", + " selected_path <- file.path(selected_role_dir, sprintf('%s.json', cid))", + " if (!file.exists(selected_path)) next", + " cohort_obj <- jsonlite::fromJSON(selected_path, simplifyVector = FALSE)", + " mutation_count <- 0L", + " if (length(items) > 0) {", + " for (item in items) {", + " if (is.null(item$actions)) next", + " for (act in item$actions) {", + " if (is_mutating_improvement_action(act)) {", + " cohort_obj <- apply_action(cohort_obj, act)", + " mutation_count <- mutation_count + 1L", + " }", + " }", + " }", + " }", + " if (mutation_count == 0L) next", + " applied <- TRUE", + " out_path <- file.path(patched_role_dir, sprintf('%s.json', cid))", + " jsonlite::write_json(cohort_obj, out_path, pretty = TRUE, auto_unbox = TRUE, na = 'null')", + " file.copy(out_path, file.path(patched_dir, sprintf('%s.json', cid)), overwrite = TRUE)", + " }", + " invisible(applied)", + "}", + "", + "apply_for_role(file.path(output_dir, 'improvements_target.json'), selected_target_dir, patched_target_dir)", + "apply_for_role(file.path(output_dir, 'improvements_comparator.json'), selected_comparator_dir, patched_comparator_dir)", + "apply_for_role(file.path(output_dir, 'improvements_outcome.json'), selected_outcome_dir, patched_outcome_dir)", + "for (path in list.files(selected_dir, pattern = '\\\\.(json)$', full.names = TRUE)) {", + " dest <- file.path(patched_dir, basename(path))", + " if (!file.exists(dest)) file.copy(path, dest, overwrite = TRUE)", + "}", + "" + ) + write_lines(file.path(scripts_dir, "02_apply_improvements.R"), script_02) + + script_03 <- c( + script_header, + "library(Strategus)", + "library(CohortGenerator)", + "library(DatabaseConnector)", + "library(dplyr)", + "library(CirceR)", + "library(SqlRender)", + "", + package_loader_lines, + "library(jsonlite)", + "library(ParallelLogger)", + "`%||%` <- function(x, y) if (is.null(x)) y else x", + "", + sprintf("base_dir <- '%s'", base_dir), + "selected_dir <- file.path(base_dir, 'selected-cohorts')", + "patched_dir <- file.path(base_dir, 'patched-cohorts')", + "cohort_csv <- file.path(selected_dir, 'Cohorts.csv')", + "cohort_json_dir <- if (length(list.files(patched_dir, pattern = '\\\\.(json)$')) > 0) patched_dir else selected_dir", + "sql_dir <- file.path(cohort_json_dir, 'sql')", + "dir.create(sql_dir, recursive = TRUE, showWarnings = FALSE)", + "", + "db_details_path <- file.path(base_dir, 'strategus-db-details.json')", + "execution_settings_path <- file.path(base_dir, 'strategus-execution-settings.json')", + "connectionDetails <- OHDSIAssistant::createStrategusConnectionDetails(path = db_details_path)", + "dbms <- connectionDetails$dbms %||% 'postgresql'", + "exec <- OHDSIAssistant::createStrategusExecutionSettings(path = execution_settings_path)", + "executionSettings_cohorts <- exec$executionSettings", + "cdmDatabaseSchema <- exec$cdmDatabaseSchema", + "workDatabaseSchema <- exec$workDatabaseSchema", + "resultsDatabaseSchema <- exec$resultsDatabaseSchema", + "vocabularyDatabaseSchema <- exec$vocabularyDatabaseSchema", + "cohortTable <- exec$cohortTable", + "cohortIdFieldName <- exec$cohortIdFieldName", + "dir.create(exec$workFolder, recursive = TRUE, showWarnings = FALSE)", + "dir.create(exec$resultsFolder, recursive = TRUE, showWarnings = FALSE)", + "", + "cohort_settings <- read.csv(cohort_csv, stringsAsFactors = FALSE)", + "if (nrow(cohort_settings) > 0) {", + " id_col <- if ('cohort_id' %in% names(cohort_settings)) 'cohort_id' else 'cohortId'", + " for (i in seq_len(nrow(cohort_settings))) {", + " cohort_id <- cohort_settings[[id_col]][i]", + " sql_path <- file.path(sql_dir, sprintf('%s.sql', cohort_id))", + " if (!file.exists(sql_path)) {", + " json_path <- file.path(cohort_json_dir, sprintf('%s.json', cohort_id))", + " if (!file.exists(json_path)) stop('Missing cohort JSON: ', json_path)", + " json_text <- readChar(json_path, nchars = file.info(json_path)$size, useBytes = TRUE)", + " cohort_expression <- CirceR::cohortExpressionFromJson(json_text)", + " generateOptions <- CirceR::createGenerateOptions(", + " cohortIdFieldName = cohortIdFieldName,", + " cdmSchema = cdmDatabaseSchema,", + " targetTable = paste0(workDatabaseSchema, '.', cohortTable),", + " resultSchema = resultsDatabaseSchema,", + " vocabularySchema = vocabularyDatabaseSchema,", + " generateStats = TRUE", + " )", + " sql <- CirceR::buildCohortQuery(cohort_expression, generateOptions)", + " sql <- SqlRender::render(sql)", + " sql <- SqlRender::translate(sql, targetDialect = dbms)", + " writeLines(sql, sql_path, useBytes = TRUE)", + " }", + " }", + "}", + "", + "cohortDefinitionSet <- CohortGenerator::getCohortDefinitionSet(", + " settingsFileName = cohort_csv,", + " jsonFolder = cohort_json_dir,", + " sqlFolder = sql_dir", + ")", + "", + "cgModule <- CohortGeneratorModule$new()", + "cohortDefinitionSharedResource <- cgModule$createCohortSharedResourceSpecifications(", + " cohortDefinitionSet = cohortDefinitionSet", + ")", + "cohortGeneratorModuleSpecifications <- cgModule$createModuleSpecifications(generateStats = TRUE)", + "", + "analysisSpecifications <- createEmptyAnalysisSpecifications() %>%", + " addSharedResources(cohortDefinitionSharedResource) %>%", + " addModuleSpecifications(cohortGeneratorModuleSpecifications)", + "", + "execute(", + " analysisSpecifications = analysisSpecifications,", + " executionSettings = executionSettings_cohorts,", + " connectionDetails = connectionDetails", + ")", + "" + ) + write_lines(file.path(scripts_dir, "03_generate_cohorts.R"), script_03) + + script_04 <- c( + script_header, + "library(Keeper)", + "library(jsonlite)", + "library(DatabaseConnector)", + "", + package_loader_lines, + "", + sprintf("base_dir <- '%s'", base_dir), + "output_dir <- file.path(base_dir, 'outputs')", + "keeper_dir <- file.path(base_dir, 'keeper-case-review')", + "dir.create(keeper_dir, recursive = TRUE, showWarnings = FALSE)", + "id_map <- jsonlite::fromJSON(file.path(output_dir, 'cohort_id_map.json'), simplifyVector = TRUE)$mapping", + "db_details_path <- file.path(base_dir, 'strategus-db-details.json')", + "execution_settings_path <- file.path(base_dir, 'strategus-execution-settings.json')", + "connectionDetails <- OHDSIAssistant::createStrategusConnectionDetails(path = db_details_path)", + "exec <- OHDSIAssistant::createStrategusExecutionSettings(path = execution_settings_path)", + "databaseId <- ''", + "cdmDatabaseSchema <- exec$cdmDatabaseSchema", + "cohortDatabaseSchema <- exec$workDatabaseSchema", + "cohortTable <- exec$cohortTable", + "", + "# TODO: Replace these placeholder concept vectors with study-specific Keeper settings.", + "keeperConcepts <- list(", + " doi = integer(0),", + " symptoms = integer(0),", + " comorbidities = integer(0),", + " drugs = integer(0),", + " diagnosticProcedures = integer(0),", + " measurements = integer(0),", + " alternativeDiagnosis = integer(0),", + " treatmentProcedures = integer(0),", + " complications = integer(0)", + ")", + "", + "for (i in seq_len(nrow(id_map))) {", + " cid <- id_map$cohort_id[i]", + " role <- id_map$role[i]", + " cohort_name <- id_map$cohort_name[i]", + " role_dir <- file.path(keeper_dir, role)", + " dir.create(role_dir, recursive = TRUE, showWarnings = FALSE)", + " keeper <- createKeeper(", + " connectionDetails = connectionDetails,", + " databaseId = databaseId,", + " cdmDatabaseSchema = cdmDatabaseSchema,", + " cohortDatabaseSchema = cohortDatabaseSchema,", + " cohortTable = cohortTable,", + " cohortDefinitionId = cid,", + " cohortName = cohort_name,", + " sampleSize = 100,", + " assignNewId = TRUE,", + " useAncestor = TRUE,", + " doi = keeperConcepts$doi,", + " symptoms = keeperConcepts$symptoms,", + " comorbidities = keeperConcepts$comorbidities,", + " drugs = keeperConcepts$drugs,", + " diagnosticProcedures = keeperConcepts$diagnosticProcedures,", + " measurements = keeperConcepts$measurements,", + " alternativeDiagnosis = keeperConcepts$alternativeDiagnosis,", + " treatmentProcedures = keeperConcepts$treatmentProcedures,", + " complications = keeperConcepts$complications", + " )", + " out_path <- file.path(role_dir, sprintf('%s.csv', cid))", + " write.csv(keeper, out_path, row.names = FALSE)", + "}", + "", + "# TODO: When ACP is implemented for cohort methods, add optional LLM-based Keeper row review here.", + "" + ) + write_lines(file.path(scripts_dir, "04_keeper_review.R"), script_04) + + script_05 <- c( + script_header, + "library(Strategus)", + "library(CohortDiagnostics)", + "library(CohortGenerator)", + "library(DatabaseConnector)", + "library(dplyr)", + "", + package_loader_lines, + "", + sprintf("base_dir <- '%s'", base_dir), + "selected_dir <- file.path(base_dir, 'selected-cohorts')", + "patched_dir <- file.path(base_dir, 'patched-cohorts')", + "cohort_csv <- file.path(selected_dir, 'Cohorts.csv')", + "cohort_json_dir <- if (length(list.files(patched_dir, pattern = '\\\\.(json)$')) > 0) patched_dir else selected_dir", + "sql_dir <- file.path(cohort_json_dir, 'sql')", + "dir.create(sql_dir, recursive = TRUE, showWarnings = FALSE)", + "", + "db_details_path <- file.path(base_dir, 'strategus-db-details.json')", + "execution_settings_path <- file.path(base_dir, 'strategus-execution-settings.json')", + "connectionDetails <- OHDSIAssistant::createStrategusConnectionDetails(path = db_details_path)", + "exec <- OHDSIAssistant::createStrategusExecutionSettings(path = execution_settings_path)", + "executionSettings_diagnostics <- exec$executionSettings", + "", + "cohortDefinitionSet <- CohortGenerator::getCohortDefinitionSet(", + " settingsFileName = cohort_csv,", + " jsonFolder = cohort_json_dir,", + " sqlFolder = sql_dir", + ")", + "", + "cgModule <- CohortGeneratorModule$new()", + "cohortDefinitionSharedResource <- cgModule$createCohortSharedResourceSpecifications(", + " cohortDefinitionSet = cohortDefinitionSet", + ")", + "", + "cdModule <- CohortDiagnosticsModule$new()", + "cohortDiagnosticsModuleSpecifications <- cdModule$createModuleSpecifications(", + " runInclusionStatistics = TRUE,", + " runIncludedSourceConcepts = TRUE,", + " runOrphanConcepts = TRUE,", + " runTimeSeries = FALSE,", + " runVisitContext = TRUE,", + " runBreakdownIndexEvents = TRUE,", + " runIncidenceRate = TRUE,", + " runCohortRelationship = TRUE,", + " runTemporalCohortCharacterization = TRUE", + ")", + "", + "analysisSpecifications <- createEmptyAnalysisSpecifications() %>%", + " addSharedResources(cohortDefinitionSharedResource) %>%", + " addModuleSpecifications(cohortDiagnosticsModuleSpecifications)", + "", + "execute(", + " analysisSpecifications = analysisSpecifications,", + " executionSettings = executionSettings_diagnostics,", + " connectionDetails = connectionDetails", + ")", + "" + ) + write_lines(file.path(scripts_dir, "05_diagnostics.R"), script_05) + + script_06 <- c( + script_header, + "library(Strategus)", + "library(CohortGenerator)", + "library(CohortIncidence)", + "library(jsonlite)", + "library(ParallelLogger)", + "", + package_loader_lines, + "", + sprintf("base_dir <- '%s'", base_dir), + "output_dir <- file.path(base_dir, 'outputs')", + "analysis_settings_dir <- file.path(base_dir, 'analysis-settings')", + "selected_dir <- file.path(base_dir, 'selected-cohorts')", + "patched_dir <- file.path(base_dir, 'patched-cohorts')", + "dir.create(analysis_settings_dir, recursive = TRUE, showWarnings = FALSE)", + "", + "`%||%` <- function(x, y) if (is.null(x)) y else x", + "defaults <- jsonlite::fromJSON(file.path(output_dir, 'cm_analysis_defaults.json'), simplifyVector = TRUE)", + "conceptSetSelections <- jsonlite::fromJSON(file.path(output_dir, 'cm_concept_set_selections.json'), simplifyVector = FALSE)", + "cohort_csv <- file.path(selected_dir, 'Cohorts.csv')", + "cohort_json_dir <- if (length(list.files(patched_dir, pattern = '\\\\.(json)$')) > 0) patched_dir else selected_dir", + "sql_dir <- file.path(cohort_json_dir, 'sql')", + "dir.create(sql_dir, recursive = TRUE, showWarnings = FALSE)", + "getDbDefaults <- defaults$get_db_cohort_method_data", + "studyPopulationDefaults <- defaults$create_study_population", + "psDefaults <- defaults$create_ps", + "psAdjustmentDefaults <- defaults$ps_adjustment %||% list()", + "matchDefaults <- defaults$match_on_ps", + "stratifyDefaults <- defaults$stratify_by_ps %||% list()", + "outcomeModelDefaults <- defaults$fit_outcome_model", + "covariateConceptDefaults <- defaults$covariate_concept_sets %||% list()", + "comparison_payload <- jsonlite::fromJSON(file.path(output_dir, 'cm_comparisons.json'), simplifyVector = FALSE)", + "comparisons <- comparison_payload$comparisons %||% list()", + "if (length(comparisons) == 0) stop('No comparisons found in cm_comparisons.json')", + "comparison <- comparisons[[1]]", + "", + "cohortDefinitionSet <- CohortGenerator::getCohortDefinitionSet(", + " settingsFileName = cohort_csv,", + " jsonFolder = cohort_json_dir,", + " sqlFolder = sql_dir", + ")", + "lookup_cohort_name <- function(cohort_id, fallback = NULL) {", + " row <- cohortDefinitionSet[as.integer(cohortDefinitionSet$cohortId) == as.integer(cohort_id), , drop = FALSE]", + " if (nrow(row) > 0 && 'cohortName' %in% names(row) && nzchar(as.character(row$cohortName[1]))) {", + " return(as.character(row$cohortName[1]))", + " }", + " fallback %||% sprintf('Cohort %s', cohort_id)", + "}", + "to_ci_anchor <- function(anchor) {", + " anchor <- tolower(trimws(as.character(anchor %||% 'cohort start')))", + " if (identical(anchor, 'cohort end')) 'end' else 'start'", + "}", + "", + "analyticSettingsProfile <- defaults$profile_name %||% 'Analytic Setting 1'", + "psAdjustmentStrategy <- psAdjustmentDefaults$strategy %||% 'match_on_ps'", + "psTrimmingStrategy <- psAdjustmentDefaults$trimmingStrategy %||% 'none'", + "psTrimmingPercent <- as.numeric(psAdjustmentDefaults$trimmingPercent %||% 5)", + "if (is.na(psTrimmingPercent)) psTrimmingPercent <- 5", + "equipoiseLowerBound <- as.numeric(psAdjustmentDefaults$equipoiseLowerBound %||% 0.25)", + "equipoiseUpperBound <- as.numeric(psAdjustmentDefaults$equipoiseUpperBound %||% 0.75)", + "if (is.na(equipoiseLowerBound)) equipoiseLowerBound <- 0.25", + "if (is.na(equipoiseUpperBound)) equipoiseUpperBound <- 0.75", + "matchMaxRatio <- as.integer(matchDefaults$maxRatio %||% 1L)", + "if (is.na(matchMaxRatio)) matchMaxRatio <- 1L", + "derivedOutcomeStratified <- if (identical(psAdjustmentStrategy, 'stratify_by_ps')) {", + " TRUE", + "} else if (identical(psAdjustmentStrategy, 'match_on_ps')) {", + " matchMaxRatio != 1L", + "} else {", + " FALSE", + "}", + "", + "target_id <- as.numeric(comparison$target$cohort_id %||% NA_real_)", + "comparator_id <- as.numeric(comparison$comparator$cohort_id %||% NA_real_)", + "outcome_ids <- vapply(comparison$outcomes %||% list(), function(x) as.numeric(x$cohort_id %||% NA_real_), numeric(1))", + "if (is.na(target_id)) stop('Missing target cohort ID in cm_comparisons.json')", + "if (is.na(comparator_id)) stop('Missing comparator cohort ID in cm_comparisons.json')", + "if (length(outcome_ids) == 0) stop('Missing outcome cohort IDs in cm_comparisons.json')", + "target_name <- lookup_cohort_name(target_id, comparison$target$name %||% 'Target')", + "comparator_name <- lookup_cohort_name(comparator_id, comparison$comparator$name %||% 'Comparator')", + "outcome_names <- vapply(comparison$outcomes %||% list(), function(x) {", + " oid <- as.numeric(x$cohort_id %||% NA_real_)", + " lookup_cohort_name(oid, x$name %||% sprintf('Outcome %s', oid))", + "}, character(1))", + "", + "negativeControlConceptSet <- conceptSetSelections$negative_control %||% list()", + "covariateConceptSelections <- conceptSetSelections$covariates %||% list()", + "includedConceptSetId <- as.integer(covariateConceptDefaults$include_concept_set_id %||% covariateConceptSelections$include$concept_set_id %||% NA_integer_)", + "excludedConceptSetId <- as.integer(covariateConceptDefaults$exclude_concept_set_id %||% covariateConceptSelections$exclude$concept_set_id %||% NA_integer_)", + "includedCovariateConceptIds <- numeric(0)", + "excludedCovariateConceptIds <- numeric(0)", + "if (!is.na(includedConceptSetId)) message('TODO: Replace dummy covariate include concept set ', includedConceptSetId, ' with actual concept IDs before production use.')", + "if (!is.na(excludedConceptSetId)) message('TODO: Replace dummy covariate exclude concept set ', excludedConceptSetId, ' with actual concept IDs before production use.')", + "if (isTRUE(negativeControlConceptSet$enabled %||% FALSE)) message('TODO: Negative control concept set selected as dummy placeholder: ', negativeControlConceptSet$concept_set_id %||% NA_integer_)", + "", + "# Shared cohort definitions are included so downstream modules can resolve cohort metadata.", + "# Cohort generation itself is intentionally not included here; run 03_generate_cohorts.R first.", + "cgModule <- CohortGeneratorModule$new()", + "cohortDefinitionSharedResource <- cgModule$createCohortSharedResourceSpecifications(", + " cohortDefinitionSet = cohortDefinitionSet", + ")", + "", + "# Characterization module: one characterization configuration for target and comparator cohorts.", + "characterizationTargetIds <- as.numeric(unique(c(target_id, comparator_id)))", + "characterizationModule <- CharacterizationModule$new()", + "characterizationModuleSpecifications <- characterizationModule$createModuleSpecifications(", + " targetIds = characterizationTargetIds,", + " outcomeIds = as.numeric(outcome_ids),", + " limitToFirstInNDays = as.numeric(rep(if (isTRUE(getDbDefaults$firstExposureOnly %||% TRUE)) 99999 else 0, length(characterizationTargetIds))),", + " minPriorObservation = as.numeric(getDbDefaults$washoutPeriod %||% 0),", + " outcomeWashoutDays = as.numeric(rep(as.numeric(studyPopulationDefaults$priorOutcomeLookback %||% 99999), length(outcome_ids))),", + " riskWindowStart = as.numeric(studyPopulationDefaults$riskWindowStart %||% 0),", + " startAnchor = studyPopulationDefaults$startAnchor %||% 'cohort start',", + " riskWindowEnd = as.numeric(studyPopulationDefaults$riskWindowEnd %||% 0),", + " endAnchor = studyPopulationDefaults$endAnchor %||% 'cohort end',", + " mode = 'CohortIncidence'", + ")", + "", + "# CohortIncidence module: one incidence analysis across target/comparator cohorts and outcomes.", + "ciTargets <- list(", + " CohortIncidence::createCohortRef(id = target_id, name = target_name),", + " CohortIncidence::createCohortRef(id = comparator_id, name = comparator_name)", + ")", + "ciOutcomes <- lapply(seq_along(outcome_ids), function(i) {", + " CohortIncidence::createOutcomeDef(", + " id = as.numeric(outcome_ids[[i]]),", + " name = outcome_names[[i]],", + " cohortId = as.numeric(outcome_ids[[i]]),", + " cleanWindow = as.numeric(studyPopulationDefaults$priorOutcomeLookback %||% 99999)", + " )", + "})", + "ciTar <- CohortIncidence::createTimeAtRiskDef(", + " id = 1,", + " startWith = to_ci_anchor(studyPopulationDefaults$startAnchor %||% 'cohort start'),", + " startOffset = as.numeric(studyPopulationDefaults$riskWindowStart %||% 0),", + " endWith = to_ci_anchor(studyPopulationDefaults$endAnchor %||% 'cohort end'),", + " endOffset = as.numeric(studyPopulationDefaults$riskWindowEnd %||% 0)", + ")", + "ciAnalysis <- CohortIncidence::createIncidenceAnalysis(", + " targets = c(target_id, comparator_id),", + " outcomes = outcome_ids,", + " tars = c(1)", + ")", + "ciDesign <- CohortIncidence::createIncidenceDesign(", + " targetDefs = ciTargets,", + " outcomeDefs = ciOutcomes,", + " tars = list(ciTar),", + " analysisList = list(ciAnalysis),", + " strataSettings = CohortIncidence::createStrataSettings(byYear = TRUE, byGender = TRUE)", + ")", + "ciModule <- CohortIncidenceModule$new()", + "cohortIncidenceModuleSpecifications <- ciModule$createModuleSpecifications(", + " irDesign = ciDesign$toList()", + ")", + "", + "# CohortMethod module: one comparative analysis for the selected target/comparator/outcomes.", + "priorOutcomeLookback <- studyPopulationDefaults$priorOutcomeLookback %||% 99999L", + "riskWindowStart <- studyPopulationDefaults$riskWindowStart %||% 0L", + "startAnchor <- studyPopulationDefaults$startAnchor %||% 'cohort start'", + "riskWindowEnd <- studyPopulationDefaults$riskWindowEnd %||% 0L", + "endAnchor <- studyPopulationDefaults$endAnchor %||% 'cohort end'", + "outcomes <- lapply(outcome_ids, function(outcome_id) {", + " CohortMethod::createOutcome(", + " outcomeId = outcome_id,", + " outcomeOfInterest = TRUE,", + " priorOutcomeLookback = priorOutcomeLookback,", + " riskWindowStart = riskWindowStart,", + " startAnchor = startAnchor,", + " riskWindowEnd = riskWindowEnd,", + " endAnchor = endAnchor", + " )", + "})", + "", + "targetComparatorOutcomesList <- list(", + " CohortMethod::createTargetComparatorOutcomes(", + " targetId = target_id,", + " comparatorId = comparator_id,", + " outcomes = outcomes,", + " excludedCovariateConceptIds = excludedCovariateConceptIds,", + " includedCovariateConceptIds = includedCovariateConceptIds", + " )", + ")", + "", + "covariateSettings <- FeatureExtraction::createDefaultCovariateSettings()", + "getDbCohortMethodDataArgs <- CohortMethod::createGetDbCohortMethodDataArgs(", + " removeDuplicateSubjects = getDbDefaults$removeDuplicateSubjects,", + " firstExposureOnly = getDbDefaults$firstExposureOnly,", + " washoutPeriod = getDbDefaults$washoutPeriod,", + " restrictToCommonPeriod = getDbDefaults$restrictToCommonPeriod,", + " studyStartDate = getDbDefaults$studyStartDate %||% '',", + " studyEndDate = getDbDefaults$studyEndDate %||% '',", + " maxCohortSize = studyPopulationDefaults$maxCohortSize %||% getDbDefaults$maxCohortSize %||% 0,", + " covariateSettings = covariateSettings", + ")", + "createStudyPopulationArgs <- CohortMethod::createCreateStudyPopulationArgs(", + " removeSubjectsWithPriorOutcome = studyPopulationDefaults$removeSubjectsWithPriorOutcome,", + " priorOutcomeLookback = studyPopulationDefaults$priorOutcomeLookback,", + " minDaysAtRisk = studyPopulationDefaults$minDaysAtRisk,", + " riskWindowStart = studyPopulationDefaults$riskWindowStart,", + " startAnchor = studyPopulationDefaults$startAnchor,", + " riskWindowEnd = studyPopulationDefaults$riskWindowEnd,", + " endAnchor = studyPopulationDefaults$endAnchor,", + " censorAtNewRiskWindow = studyPopulationDefaults$censorAtNewRiskWindow", + ")", + "psPrior <- if (isTRUE(psDefaults$useRegularization %||% TRUE)) {", + " Cyclops::createPrior(priorType = 'laplace', exclude = c(0), useCrossValidation = TRUE)", + "} else {", + " Cyclops::createPrior(priorType = 'none')", + "}", + "createPsArgs <- if (identical(psAdjustmentStrategy, 'none') && identical(psTrimmingStrategy, 'none')) NULL else CohortMethod::createCreatePsArgs(", + " estimator = psDefaults$estimator,", + " maxCohortSizeForFitting = psDefaults$maxCohortSizeForFitting,", + " errorOnHighCorrelation = isTRUE(psDefaults$errorOnHighCorrelation %||% FALSE),", + " prior = psPrior", + ")", + "trimByPsArgs <- if (identical(psTrimmingStrategy, 'by_percent')) {", + " CohortMethod::createTrimByPsArgs(", + " trimFraction = psTrimmingPercent / 100,", + " trimMethod = 'symmetric'", + " )", + "} else if (identical(psTrimmingStrategy, 'by_equipoise')) {", + " CohortMethod::createTrimByPsArgs(", + " equipoiseBounds = c(equipoiseLowerBound, equipoiseUpperBound)", + " )", + "} else {", + " NULL", + "}", + "matchOnPsArgs <- if (identical(psAdjustmentStrategy, 'match_on_ps')) CohortMethod::createMatchOnPsArgs(", + " caliper = matchDefaults$caliper,", + " caliperScale = matchDefaults$caliperScale,", + " maxRatio = matchDefaults$maxRatio", + ") else NULL", + "stratifyByPsArgs <- if (identical(psAdjustmentStrategy, 'stratify_by_ps')) CohortMethod::createStratifyByPsArgs(", + " numberOfStrata = stratifyDefaults$numberOfStrata,", + " baseSelection = stratifyDefaults$baseSelection", + ") else NULL", + "outcomeModelPrior <- if (isTRUE(outcomeModelDefaults$useRegularization %||% TRUE)) {", + " Cyclops::createPrior(priorType = 'laplace', useCrossValidation = TRUE)", + "} else {", + " Cyclops::createPrior(priorType = 'none')", + "}", + "fitOutcomeModelArgs <- CohortMethod::createFitOutcomeModelArgs(", + " modelType = outcomeModelDefaults$modelType,", + " stratified = outcomeModelDefaults$stratified %||% derivedOutcomeStratified,", + " useCovariates = isTRUE(outcomeModelDefaults$useCovariates %||% FALSE),", + " inversePtWeighting = isTRUE(outcomeModelDefaults$inversePtWeighting %||% FALSE),", + " prior = outcomeModelPrior", + ")", + "", + "cmAnalysisList <- list(", + " CohortMethod::createCmAnalysis(", + " analysisId = as.integer(defaults$analysis_id %||% 1L),", + " description = analyticSettingsProfile %||% comparison$label %||% 'Default cohort method analysis',", + " getDbCohortMethodDataArgs = getDbCohortMethodDataArgs,", + " createStudyPopulationArgs = createStudyPopulationArgs,", + " createPsArgs = createPsArgs,", + " trimByPsArgs = trimByPsArgs,", + " matchOnPsArgs = matchOnPsArgs,", + " stratifyByPsArgs = stratifyByPsArgs,", + " fitOutcomeModelArgs = fitOutcomeModelArgs", + " )", + ")", + "cmAnalysesSpecifications <- CohortMethod::createCmAnalysesSpecifications(", + " cmAnalysisList = cmAnalysisList,", + " targetComparatorOutcomesList = targetComparatorOutcomesList,", + " analysesToExclude = NULL,", + " refitPsForEveryOutcome = FALSE,", + " refitPsForEveryStudyPopulation = TRUE,", + " cmDiagnosticThresholds = CohortMethod::createCmDiagnosticThresholds()", + ")", + "cmModule <- CohortMethodModule$new()", + "cohortMethodModuleSpecifications <- cmModule$createModuleSpecifications(", + " cmAnalysesSpecifications = cmAnalysesSpecifications$toList()", + ")", + "", + "analysisSpecifications <- Strategus::createEmptyAnalysisSpecifications()", + "analysisSpecifications <- Strategus::addSharedResources(analysisSpecifications, cohortDefinitionSharedResource)", + "analysisSpecifications <- Strategus::addModuleSpecifications(analysisSpecifications, characterizationModuleSpecifications)", + "analysisSpecifications <- Strategus::addModuleSpecifications(analysisSpecifications, cohortIncidenceModuleSpecifications)", + "analysisSpecifications <- Strategus::addModuleSpecifications(analysisSpecifications, cohortMethodModuleSpecifications)", + "analysis_spec_path <- file.path(analysis_settings_dir, 'analysisSpecification.json')", + "ParallelLogger::saveSettingsToJson(analysisSpecifications, analysis_spec_path)", + "", + "jsonlite::write_json(", + " list(", + " comparison_label = comparison$label %||% '',", + " target_id = target_id,", + " comparator_id = comparator_id,", + " outcome_ids = as.list(outcome_ids),", + " analysis_specification_path = analysis_spec_path,", + " modules = c('CharacterizationModule', 'CohortIncidenceModule', 'CohortMethodModule'),", + " defaults_path = file.path(output_dir, 'cm_analysis_defaults.json'),", + " cm_analysis_json_path = file.path(analysis_settings_dir, 'cmAnalysis.json'),", + " concept_set_selections_path = file.path(output_dir, 'cm_concept_set_selections.json'),", + " negative_control_concept_set_id = negativeControlConceptSet$concept_set_id %||% NULL,", + " study_start_date = getDbDefaults$studyStartDate %||% '',", + " study_end_date = getDbDefaults$studyEndDate %||% '',", + " ps_adjustment_strategy = psAdjustmentStrategy,", + " ps_trimming_strategy = psTrimmingStrategy,", + " covariate_include_all_concepts = covariateConceptDefaults$include_all_concepts %||% covariateConceptSelections$include_all_concepts %||% TRUE,", + " covariate_include_concept_set_id = if (is.na(includedConceptSetId)) NULL else includedConceptSetId,", + " covariate_exclude_concept_set_id = if (is.na(excludedConceptSetId)) NULL else excludedConceptSetId,", + " analytic_settings_profile_name = defaults$profile_name %||% NULL,", + " analytic_settings_customized_sections = defaults$customized_sections %||% character(0),", + " TODO = 'Replace dummy concept set selections with actual concept definitions and concept IDs when ACP/MCP support is implemented.'", + " ),", + " file.path(output_dir, 'cm_analysis_state.json'),", + " pretty = TRUE,", + " auto_unbox = TRUE", + ")", + "", + "# Execute the just-created Strategus specification.", + "db_details_path <- file.path(base_dir, 'strategus-db-details.json')", + "execution_settings_path <- file.path(base_dir, 'strategus-execution-settings.json')", + "connectionDetails <- OHDSIAssistant::createStrategusConnectionDetails(path = db_details_path)", + "exec <- OHDSIAssistant::createStrategusExecutionSettings(path = execution_settings_path)", + "", + "result <- Strategus::execute(", + " connectionDetails = connectionDetails,", + " analysisSpecifications = analysisSpecifications,", + " executionSettings = exec$executionSettings", + ")", + "", + "result_path <- file.path(analysis_settings_dir, 'strategus_execute_result.rds')", + "saveRDS(result, result_path)", + "message('Strategus execution result saved to: ', result_path)", + "" + ) + write_lines(file.path(scripts_dir, "06_cm_spec.R"), script_06) + + if (interactive) { + cat("\n== Session Summary ==\n") + cat(sprintf("Study intent: %s\n", studyIntent)) + cat(sprintf("Comparison: %s\n", comparisonLabel)) + cat(sprintf("Target: %s (atlas %s -> cohort %s)\n", target_name, selected_target_id, new_target_id)) + cat(sprintf("Comparator: %s (atlas %s -> cohort %s)\n", comparator_name, selected_comparator_id, new_comparator_id)) + cat("Outcomes:\n") + for (i in seq_along(new_outcome_ids)) { + cat(sprintf(" - %s (atlas %s -> cohort %s)\n", outcome_names[[i]], selected_outcome_ids[[i]], new_outcome_ids[[i]])) + } + if (isTRUE(negative_control_enabled)) { + cat(sprintf("Negative control concept set: %s\n", negativeControlConceptSetId)) + } + if (isTRUE(covariate_enabled)) { + include_label <- if (is.null(includeCovariateConceptSetId)) "all concepts" else as.character(includeCovariateConceptSetId) + exclude_label <- if (is.null(excludeCovariateConceptSetId)) "none" else as.character(excludeCovariateConceptSetId) + cat(sprintf("Covariate concept sets: include=%s, exclude=%s\n", include_label, exclude_label)) + } + cat(sprintf("Cohort ID remap: %s\n", if (isTRUE(use_mapping)) sprintf("enabled (base %s)", cohortIdBase) else "disabled")) + cat(sprintf("Analytic settings mode: %s\n", analytic_settings_mode)) + cat(sprintf("Analytic settings profile: %s\n", effective_analytic_settings$profile_name)) + section_label <- if (length(effective_analytic_settings$customized_sections) == 0) { + "defaults only" + } else { + paste(as.character(effective_analytic_settings$customized_sections), collapse = ", ") + } + cat(sprintf("Customized analytic sections: %s\n", section_label)) + if (identical(analytic_settings_mode, "free_text")) { + cat(sprintf("Analytic settings description: %s\n", analytic_settings_description)) + cat(sprintf("Analytic settings recommendation source: %s\n", analytic_settings_recommendation_source)) + cat(sprintf("Analytic settings recommendation: %s (%s)\n", analytic_settings_recommendation_path, analytic_settings_recommendation_status)) + if (!is.na(analytic_settings_acp_response_path)) { + cat(sprintf("ACP specifications response: %s\n", analytic_settings_acp_response_path)) + } + } + cat("Generated scripts:\n") + cat(" - 02_apply_improvements.R\n") + cat(" - 03_generate_cohorts.R\n") + cat(" - 04_keeper_review.R\n") + cat(" - 05_diagnostics.R\n") + cat(" - 06_cm_spec.R\n") + cat("Status/TODO artifacts:\n") + cat(sprintf(" - %s\n", improvements_status_path)) + cat(sprintf(" - %s\n", cm_evaluation_todo_path)) + } + + invisible(list( + output_dir = output_dir, + scripts_dir = scripts_dir, + cohort_methods_intent_split = json_string_or_null(if (file.exists(cohort_methods_intent_split_path)) cohort_methods_intent_split_path else NULL), + manual_intent = manual_intent_path, + manual_inputs = manual_inputs_path, + cm_comparisons = cm_comparisons_path, + improvements_status = improvements_status_path, + improvements_target = improvements_target_path, + improvements_comparator = improvements_comparator_path, + improvements_outcome = improvements_outcome_path, + cm_concept_set_selections = cm_concept_set_selections_path, + cm_analysis_json = cm_analysis_json_path, + cohort_csv = cohort_csv, + state = state_path + )) +} diff --git a/R/OHDSIAssistant/R/strategus_incidence_shell.R b/R/OHDSIAssistant/R/strategus_incidence_shell.R index c0dd506..770e026 100644 --- a/R/OHDSIAssistant/R/strategus_incidence_shell.R +++ b/R/OHDSIAssistant/R/strategus_incidence_shell.R @@ -112,8 +112,29 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide path } + phenotype_definition_path <- function(phenotype_id, index_def_dir) { + file.path(index_def_dir, sprintf("%s.json", gsub(":", "__", phenotype_id, fixed = TRUE))) + } + + stop_if_unsupported_selected <- function(phenotype_ids, role_label) { + unsupported <- phenotype_ids[!grepl("^ohdsi:", phenotype_ids %||% character(0))] + if (length(unsupported) > 0) { + stop( + sprintf( + paste0( + "Selected %s phenotype(s) include non-OHDSI ids (%s). ", + "This demo workflow does not yet support converting non-OHDSI phenotype definitions ", + "into computable OHDSI cohort definitions. Please re-run and choose an OHDSI phenotype." + ), + role_label, + paste(unique(unsupported), collapse = ", ") + ) + ) + } + } + copy_cohort_json_multi <- function(source_id, dest_id, dest_dirs, index_def_dir) { - src <- file.path(index_def_dir, sprintf("%s.json", source_id)) + src <- phenotype_definition_path(source_id, index_def_dir) if (!file.exists(src)) stop(sprintf("Cohort JSON not found: %s", src)) dests <- character(0) for (dest_dir in dest_dirs) { @@ -343,7 +364,7 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide cat("\n== Target Phenotype Recommendations ==\n") for (i in seq_along(recommendations_target)) { rec <- recommendations_target[[i]] - cat(sprintf("%d. %s (ID %s)\n", i, rec$cohortName %||% "", rec$cohortId %||% "?")) + cat(sprintf("%d. %s (ID %s)\n", i, rec$phenotype_name %||% "", rec$phenotype_id %||% "?")) if (!is.null(rec$justification)) cat(sprintf(" %s\n", rec$justification)) } @@ -370,7 +391,7 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide cat("\n== Target Phenotype Recommendations (window 2) ==\n") for (i in seq_along(recommendations_target)) { rec <- recommendations_target[[i]] - cat(sprintf("%d. %s (ID %s)\n", i, rec$cohortName %||% "", rec$cohortId %||% "?")) + cat(sprintf("%d. %s (ID %s)\n", i, rec$phenotype_name %||% "", rec$phenotype_id %||% "?")) if (!is.null(rec$justification)) cat(sprintf(" %s\n", rec$justification)) } ok_any <- prompt_yesno("Are any of these acceptable?", default = TRUE) @@ -408,17 +429,17 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide if (interactive) { labels <- vapply(seq_along(recommendations_target), function(i) { rec <- recommendations_target[[i]] - sprintf("%s (ID %s)", rec$cohortName %||% "", rec$cohortId %||% "?") + sprintf("%s (ID %s)", rec$phenotype_name %||% "", rec$phenotype_id %||% "?") }, character(1)) picks <- utils::select.list(labels, multiple = FALSE, title = "Select target phenotype") if (nzchar(picks)) { idx <- which(labels == picks)[1] - selected_ids_target <- recommendations_target[[idx]]$cohortId + selected_ids_target <- recommendations_target[[idx]]$phenotype_id } } else { - selected_ids_target <- recommendations_target[[1]]$cohortId + selected_ids_target <- recommendations_target[[1]]$phenotype_id } - selected_ids_target <- as.integer(selected_ids_target) + selected_ids_target <- as.character(selected_ids_target) if (length(selected_ids_target) == 0) stop("No target cohort selected.") use_mapping <- FALSE @@ -444,6 +465,8 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide new } + stop_if_unsupported_selected(selected_ids_target, "target") + new_ids_target <- map_ids(selected_ids_target) copy_cohort_json_multi(selected_ids_target, new_ids_target, c(selected_target_dir, selected_dir), index_def_dir) @@ -586,7 +609,7 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide cat("\n== Outcome Phenotype Recommendations ==\n") for (i in seq_along(recommendations_outcome)) { rec <- recommendations_outcome[[i]] - cat(sprintf("%d. %s (ID %s)\n", i, rec$cohortName %||% "", rec$cohortId %||% "?")) + cat(sprintf("%d. %s (ID %s)\n", i, rec$phenotype_name %||% "", rec$phenotype_id %||% "?")) if (!is.null(rec$justification)) cat(sprintf(" %s\n", rec$justification)) } @@ -613,7 +636,7 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide cat("\n== Outcome Phenotype Recommendations (window 2) ==\n") for (i in seq_along(recommendations_outcome)) { rec <- recommendations_outcome[[i]] - cat(sprintf("%d. %s (ID %s)\n", i, rec$cohortName %||% "", rec$cohortId %||% "?")) + cat(sprintf("%d. %s (ID %s)\n", i, rec$phenotype_name %||% "", rec$phenotype_id %||% "?")) if (!is.null(rec$justification)) cat(sprintf(" %s\n", rec$justification)) } ok_any <- prompt_yesno("Are any of these acceptable?", default = TRUE) @@ -651,23 +674,25 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide if (interactive) { labels <- vapply(seq_along(recommendations_outcome), function(i) { rec <- recommendations_outcome[[i]] - sprintf("%s (ID %s)", rec$cohortName %||% "", rec$cohortId %||% "?") + sprintf("%s (ID %s)", rec$phenotype_name %||% "", rec$phenotype_id %||% "?") }, character(1)) picks <- utils::select.list(labels, multiple = TRUE, title = "Select outcome phenotypes") selected_ids_outcome <- vapply(picks, function(label) { idx <- which(labels == label)[1] - recommendations_outcome[[idx]]$cohortId - }, numeric(1)) + recommendations_outcome[[idx]]$phenotype_id %||% NA_character_ + }, character(1)) } else { if (length(recommendations_outcome) >= 2) { - selected_ids_outcome <- vapply(recommendations_outcome[-1], function(r) r$cohortId, numeric(1)) + selected_ids_outcome <- vapply(recommendations_outcome[-1], function(r) r$phenotype_id %||% NA_character_, character(1)) } else { - selected_ids_outcome <- vapply(recommendations_outcome, function(r) r$cohortId, numeric(1)) + selected_ids_outcome <- vapply(recommendations_outcome, function(r) r$phenotype_id %||% NA_character_, character(1)) } } - selected_ids_outcome <- as.integer(selected_ids_outcome) + selected_ids_outcome <- as.character(selected_ids_outcome) if (length(selected_ids_outcome) == 0) stop("No outcome cohorts selected.") + stop_if_unsupported_selected(selected_ids_outcome, "outcome") + new_ids_outcome <- map_ids(selected_ids_outcome) for (i in seq_along(new_ids_outcome)) { @@ -791,11 +816,11 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide for (i in seq_along(new_ids_target)) { cid <- selected_ids_target[[i]] new_id <- new_ids_target[[i]] - rec <- recommendations_target[[which(vapply(recommendations_target, function(r) r$cohortId == cid, logical(1)))]] + rec <- recommendations_target[[which(vapply(recommendations_target, function(r) r$phenotype_id == cid, logical(1)))]] cohort_rows[[length(cohort_rows) + 1]] <- data.frame( atlas_id = cid, cohort_id = new_id, - cohort_name = rec$cohortName %||% paste0("Cohort ", new_id), + cohort_name = rec$phenotype_name %||% paste0("Cohort ", new_id), logic_description = rec$justification %||% NA_character_, generate_stats = TRUE, stringsAsFactors = FALSE @@ -806,11 +831,11 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide for (i in seq_along(new_ids_outcome)) { cid <- selected_ids_outcome[[i]] new_id <- new_ids_outcome[[i]] - rec <- recommendations_outcome[[which(vapply(recommendations_outcome, function(r) r$cohortId == cid, logical(1)))]] + rec <- recommendations_outcome[[which(vapply(recommendations_outcome, function(r) r$phenotype_id == cid, logical(1)))]] cohort_rows[[length(cohort_rows) + 1]] <- data.frame( atlas_id = cid, cohort_id = new_id, - cohort_name = rec$cohortName %||% paste0("Cohort ", new_id), + cohort_name = rec$phenotype_name %||% paste0("Cohort ", new_id), logic_description = rec$justification %||% NA_character_, generate_stats = TRUE, stringsAsFactors = FALSE @@ -882,8 +907,15 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide script_01 <- c( script_header, "`%||%` <- function(x, y) if (is.null(x)) y else x", + "phenotype_definition_path <- function(phenotype_id, index_def_dir) {", + " file.path(index_def_dir, sprintf('%s.json', gsub(':', '__', phenotype_id, fixed = TRUE)))", + "}", + "stop_if_unsupported_selected <- function(phenotype_ids, role_label) {", + " unsupported <- phenotype_ids[!grepl('^ohdsi:', phenotype_ids %||% character(0))]", + " if (length(unsupported) > 0) stop(sprintf('Selected %s phenotype(s) include non-OHDSI ids (%s). This demo workflow does not yet support converting non-OHDSI phenotype definitions into computable OHDSI cohort definitions. Please re-run and choose an OHDSI phenotype.', role_label, paste(unique(unsupported), collapse = ', ')))", + "}", "copy_cohort_json <- function(source_id, dest_id, dest_dirs, index_def_dir) {", - " src <- file.path(index_def_dir, sprintf('%s.json', source_id))", + " src <- phenotype_definition_path(source_id, index_def_dir)", " if (!file.exists(src)) stop('Cohort JSON not found: ', src)", " for (dest_dir in dest_dirs) {", " dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE)", @@ -904,12 +936,12 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide "recs_outcome <- jsonlite::fromJSON(file.path(output_dir, 'recommendations_outcome.json'), simplifyVector = FALSE)", "items_target <- (recs_target$recommendations %||% recs_target)$phenotype_recommendations %||% list()", "items_outcome <- (recs_outcome$recommendations %||% recs_outcome)$phenotype_recommendations %||% list()", - "labels_target <- vapply(seq_along(items_target), function(i) sprintf('%s (ID %s)', items_target[[i]]$cohortName %||% '', items_target[[i]]$cohortId %||% '?'), character(1))", - "labels_outcome <- vapply(seq_along(items_outcome), function(i) sprintf('%s (ID %s)', items_outcome[[i]]$cohortName %||% '', items_outcome[[i]]$cohortId %||% '?'), character(1))", + "labels_target <- vapply(seq_along(items_target), function(i) sprintf('%s (ID %s)', items_target[[i]]$phenotype_name %||% '', items_target[[i]]$phenotype_id %||% '?'), character(1))", + "labels_outcome <- vapply(seq_along(items_outcome), function(i) sprintf('%s (ID %s)', items_outcome[[i]]$phenotype_name %||% '', items_outcome[[i]]$phenotype_id %||% '?'), character(1))", "target_pick <- utils::select.list(labels_target, multiple = FALSE, title = 'Select target phenotype')", - "target_ids <- if (nzchar(target_pick)) items_target[[which(labels_target == target_pick)[1]]]$cohortId else integer(0)", + "target_ids <- if (nzchar(target_pick)) (items_target[[which(labels_target == target_pick)[1]]]$phenotype_id %||% '') else character(0)", "outcome_picks <- utils::select.list(labels_outcome, multiple = TRUE, title = 'Select outcome phenotypes')", - "outcome_ids <- vapply(outcome_picks, function(label) items_outcome[[which(labels_outcome == label)[1]]]$cohortId, numeric(1))", + "outcome_ids <- vapply(outcome_picks, function(label) items_outcome[[which(labels_outcome == label)[1]]]$phenotype_id %||% NA_character_, character(1))", "if (length(target_ids) == 0) stop('No target cohort selected.')", "if (length(outcome_ids) == 0) stop('No outcome cohorts selected.')", "resp <- tolower(trimws(readline('Map cohort IDs to a new range (avoid collisions)? [Y/n]: ')))", @@ -928,7 +960,9 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide " next_id <<- max(new) + 1", " new", "}", + "stop_if_unsupported_selected(target_ids, 'target')", "new_ids_target <- map_ids(target_ids)", + "stop_if_unsupported_selected(outcome_ids, 'outcome')", "new_ids_outcome <- map_ids(outcome_ids)", "for (i in seq_along(target_ids)) copy_cohort_json(target_ids[[i]], new_ids_target[[i]], c(selected_target_dir, selected_dir), index_def_dir)", "for (i in seq_along(outcome_ids)) copy_cohort_json(outcome_ids[[i]], new_ids_outcome[[i]], c(selected_outcome_dir, selected_dir), index_def_dir)", @@ -944,14 +978,14 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide "for (i in seq_along(new_ids_target)) {", " cid <- target_ids[[i]]", " new_id <- new_ids_target[[i]]", - " rec <- items_target[[which(vapply(items_target, function(r) r$cohortId == cid, logical(1)))[1]]]", - " cohort_rows[[length(cohort_rows) + 1]] <- data.frame(atlas_id = cid, cohort_id = new_id, cohort_name = rec$cohortName %||% paste0('Cohort ', new_id), logic_description = rec$justification %||% NA_character_, generate_stats = TRUE, stringsAsFactors = FALSE)", + " rec <- items_target[[which(vapply(items_target, function(r) r$phenotype_id == cid, logical(1)))[1]]]", + " cohort_rows[[length(cohort_rows) + 1]] <- data.frame(atlas_id = cid, cohort_id = new_id, cohort_name = rec$phenotype_name %||% paste0('Cohort ', new_id), logic_description = rec$justification %||% NA_character_, generate_stats = TRUE, stringsAsFactors = FALSE)", "}", "for (i in seq_along(new_ids_outcome)) {", " cid <- outcome_ids[[i]]", " new_id <- new_ids_outcome[[i]]", - " rec <- items_outcome[[which(vapply(items_outcome, function(r) r$cohortId == cid, logical(1)))[1]]]", - " cohort_rows[[length(cohort_rows) + 1]] <- data.frame(atlas_id = cid, cohort_id = new_id, cohort_name = rec$cohortName %||% paste0('Cohort ', new_id), logic_description = rec$justification %||% NA_character_, generate_stats = TRUE, stringsAsFactors = FALSE)", + " rec <- items_outcome[[which(vapply(items_outcome, function(r) r$phenotype_id == cid, logical(1)))[1]]]", + " cohort_rows[[length(cohort_rows) + 1]] <- data.frame(atlas_id = cid, cohort_id = new_id, cohort_name = rec$phenotype_name %||% paste0('Cohort ', new_id), logic_description = rec$justification %||% NA_character_, generate_stats = TRUE, stringsAsFactors = FALSE)", "}", "cohort_df <- do.call(rbind, cohort_rows)", "write.csv(cohort_df, file.path(selected_dir, 'Cohorts.csv'), row.names = FALSE)", @@ -1387,8 +1421,8 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide ")", "", "roles <- jsonlite::fromJSON(file.path(output_dir, 'cohort_roles.json'), simplifyVector = TRUE)", - "target_ids <- as.integer(roles$targets %||% integer(0))", - "outcome_ids <- as.integer(roles$outcomes %||% integer(0))", + "target_ids <- as.character(roles$targets %||% character(0))", + "outcome_ids <- as.character(roles$outcomes %||% character(0))", "if (length(target_ids) == 0) stop('No target cohorts defined in cohort_roles.json')", "if (length(outcome_ids) == 0) stop('No outcome cohorts defined in cohort_roles.json')", "cgModule <- CohortGeneratorModule$new()", @@ -1451,13 +1485,13 @@ runStrategusIncidenceShell <- function(outputDir = "demo-strategus-cohort-incide cat(sprintf(" %s\n", outcome_statement)) cat("Target cohorts:\n") for (i in seq_along(new_ids_target)) { - rec <- recommendations_target[[which(vapply(recommendations_target, function(r) r$cohortId == selected_ids_target[[i]], logical(1)))]] - cat(sprintf(" - %s (atlas %s -> cohort %s)\n", rec$cohortName %||% "", selected_ids_target[[i]], new_ids_target[[i]])) + rec <- recommendations_target[[which(vapply(recommendations_target, function(r) r$phenotype_id == selected_ids_target[[i]], logical(1)))]] + cat(sprintf(" - %s (atlas %s -> cohort %s)\n", rec$phenotype_name %||% "", selected_ids_target[[i]], new_ids_target[[i]])) } cat("Outcome cohorts:\n") for (i in seq_along(new_ids_outcome)) { - rec <- recommendations_outcome[[which(vapply(recommendations_outcome, function(r) r$cohortId == selected_ids_outcome[[i]], logical(1)))]] - cat(sprintf(" - %s (atlas %s -> cohort %s)\n", rec$cohortName %||% "", selected_ids_outcome[[i]], new_ids_outcome[[i]])) + rec <- recommendations_outcome[[which(vapply(recommendations_outcome, function(r) r$phenotype_id == selected_ids_outcome[[i]], logical(1)))]] + cat(sprintf(" - %s (atlas %s -> cohort %s)\n", rec$phenotype_name %||% "", selected_ids_outcome[[i]], new_ids_outcome[[i]])) } cat("JSON outputs:\n") cat(sprintf(" - Selected target cohorts: %s\n", selected_target_dir)) diff --git a/R/OHDSIAssistant/README.md b/R/OHDSIAssistant/README.md index b39b66f..9463591 100644 --- a/R/OHDSIAssistant/README.md +++ b/R/OHDSIAssistant/README.md @@ -63,3 +63,117 @@ It generates scripts under `demo-strategus-cohort-incidence/scripts/` following 4. `04_keeper_review.R` 5. `05_diagnostics.R` 6. `06_incidence_spec.R` + +## Suggest Cohort Method Specifications + +Use `suggestCohortMethodSpecs()` when you want ACP to turn a free-text analytic-settings description into a CohortMethod recommendation without running the full shell. + +```r +OHDSIAssistant::acp_connect("http://127.0.0.1:8765") + +res <- OHDSIAssistant::suggestCohortMethodSpecs( + studyIntent = "What is the risk of angioedema or acute myocardial infarction in new users of ACE inhibitors compared to new users of thiazide and thiazide-like diuretics?", + analyticSettingsDescription = "Use one-to-one propensity score matching, a 365-day washout, and a Cox outcome model.", + interactive = TRUE +) +``` + +The helper calls ACP `/flows/cohort_methods_specifications_recommendation`. When ACP is not connected, it returns a local stub with the same broad response shape. + +## Strategus Cohort Methods Shell + +Use `runStrategusCohortMethodsShell()` when you want the full cohort-methods workflow: intent split, target/comparator/outcome recommendation or explicit cohort IDs, analytic-settings collection, output artifacts, generated R scripts, and a merged `06_cm_spec.R` that builds and executes the Strategus specification. + +Fully interactive run: + +```r +OHDSIAssistant::acp_connect("http://127.0.0.1:8765") + +OHDSIAssistant::runStrategusCohortMethodsShell() +``` + +Provide only the study intent and let the shell recommend/select target, comparator, and outcome cohorts: + +```r +OHDSIAssistant::runStrategusCohortMethodsShell( + studyIntent = "What is the risk of angioedema or acute myocardial infarction in new users of ACE inhibitors compared to new users of thiazide and thiazide-like diuretics?" +) +``` + +Provide explicit cohort IDs when you already know the target, comparator, and outcome cohorts: + +```r +OHDSIAssistant::acp_connect("http://127.0.0.1:8765") + +OHDSIAssistant::runStrategusCohortMethodsShell( + outputDir = "demo-strategus-cohort-methods", + studyIntent = "What is the risk of angioedema or acute myocardial infarction in new users of ACE inhibitors compared to new users of thiazide and thiazide-like diuretics?", + targetCohortId = 12345, + comparatorCohortId = 23456, + outcomeCohortIds = c(34567, 45678), + comparisonLabel = "ace_inhibitors_vs_thiazide_diuretics" +) +``` + +To exercise the analytic-settings flow with stable demo inputs, pass explicit target/comparator/outcome IDs and either choose `step_by_step` when prompted or provide a free-text description: + +```r +OHDSIAssistant::runStrategusCohortMethodsShell( + outputDir = "demo-strategus-cohort-methods-analytic-settings", + studyIntent = "What is the risk of angioedema or acute myocardial infarction in new users of ACE inhibitors compared to new users of thiazide and thiazide-like diuretics?", + targetCohortId = 12345, + comparatorCohortId = 23456, + outcomeCohortIds = c(34567), + comparisonLabel = "ace_inhibitors_vs_thiazide_diuretics", + analyticSettingsDescription = "Use one-to-one propensity score matching and a Cox outcome model." +) +``` + +The shell writes outputs under `outputDir`, including `outputs/cm_analysis_defaults.json`, `outputs/cm_acp_specifications_recommendation.json` for free-text mode, `analysis-settings/cmAnalysis.json`, `analysis-settings/analysisSpecification.json`, and scripts under `scripts/`. + +Generated scripts are: + +1. `02_apply_improvements.R` +2. `03_generate_cohorts.R` +3. `04_keeper_review.R` +4. `05_diagnostics.R` +5. `06_cm_spec.R` + +Before running scripts that connect to the database, place these two files at the root of +`outputDir`: + +```text +/strategus-db-details.json +/strategus-execution-settings.json +``` + +`strategus-db-details.json`: + +```json +{ + "dbms": "postgresql", + "DB_SERVER": "localhost/database_name", + "DB_PORT": "5432", + "DB_USER": "ohdsi", + "DB_PASS": "change_me", + "DB_DRIVER_PATH": "~/jdbcDrivers", + "extraSettings": "sslmode=disable" +} +``` + +`strategus-execution-settings.json`: + +```json +{ + "cdmDatabaseSchema": "cdm_schema", + "workDatabaseSchema": "work_schema", + "resultsDatabaseSchema": "results_schema", + "vocabularyDatabaseSchema": "vocab_schema", + "cohortTable": "cohort", + "workFolder": "demo-strategus-cohort-methods/work", + "resultsFolder": "demo-strategus-cohort-methods/results", + "cohortIdFieldName": "cohort_definition_id", + "maxCores": 1 +} +``` + diff --git a/README.md b/README.md index 17a3495..056b9dd 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ Related implemented flows: - `phenotype_recommendation_advice` - `phenotype_improvements` - `phenotype_intent_split` +- `cohort_methods_intent_split` - `concept_sets_review` - `cohort_critique_general_design` @@ -174,6 +175,14 @@ This has been tested with [Open webui](https://docs.openwebui.com/), with locall If you want phenotype retrieval, you also need an indexed phenotype library. See [docs/PHENOTYPE_INDEXING.md](/ai-agent/HadesProject/OHDSI-Study-Agent/docs/PHENOTYPE_INDEXING.md). +Current indexing workflow: + +1. Build `catalog.jsonl` plus `sparse_index.pkl` from OHDSI and/or CIPHER source files. +2. Optionally enable LLM-derived retrieval keywords during that build. +3. Build `dense.index` separately when embedding infrastructure is available, either during the main build with `--build-dense` or later with `--build-dense --dense-only`. + +The retrieval layer reads from `PHENOTYPE_INDEX_DIR`, which should point to the built output directory. The source phenotype files do not need to live under that directory. + ## Minimal Examples diff --git a/acp_agent/study_agent_acp/agent.py b/acp_agent/study_agent_acp/agent.py index 06db6b7..0f21611 100644 --- a/acp_agent/study_agent_acp/agent.py +++ b/acp_agent/study_agent_acp/agent.py @@ -1,9 +1,15 @@ +import json import logging import os +import re import time +from copy import deepcopy from typing import Any, Dict, List, Optional, Protocol +from .phenotype_recommendation_utils import PhenotypeRecommendationMixin + from study_agent_core.models import ( + CohortMethodsIntentSplitInput, CohortLintInput, ConceptSetDiffInput, KeeperConceptSetsGenerateInput, @@ -11,19 +17,24 @@ PhenotypeIntentSplitInput, PhenotypeImprovementsInput, PhenotypeRecommendationAdviceInput, + PhenotypeRecommendationPlanInput, PhenotypeRecommendationsInput, ) from study_agent_core.tools import ( + cohort_methods_intent_split, cohort_lint, phenotype_intent_split, phenotype_improvements, phenotype_recommendation_advice, + phenotype_recommendation_plan, phenotype_recommendations, propose_concept_set_diff, ) from .llm_client import ( LLMCallResult, + build_cohort_methods_intent_split_prompt, build_intent_split_prompt, + build_recommendation_intent_facets_prompt, build_advice_prompt, build_keeper_concept_set_prompt, build_improvements_prompt, @@ -37,6 +48,8 @@ logger = logging.getLogger("study_agent.acp.agent") +_TOPIC_TOKEN_RE = re.compile(r"[a-z0-9]+") + class MCPClient(Protocol): def list_tools(self) -> List[Dict[str, Any]]: @@ -46,6 +59,211 @@ def call_tool(self, name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: ... +class StudyAgent(PhenotypeRecommendationMixin): + def __init__( + self, + mcp_client: Optional[MCPClient] = None, + allow_core_fallback: bool = True, + confirmation_required_tools: Optional[List[str]] = None, + ) -> None: + self._mcp_client = mcp_client + self._allow_core_fallback = allow_core_fallback + self._confirmation_required = set(confirmation_required_tools or []) + + self._core_tools = { + "propose_concept_set_diff": propose_concept_set_diff, + "cohort_lint": cohort_lint, + "phenotype_recommendation_plan": phenotype_recommendation_plan, + "phenotype_recommendations": phenotype_recommendations, + "phenotype_recommendation_advice": phenotype_recommendation_advice, + "phenotype_improvements": phenotype_improvements, + "phenotype_intent_split": phenotype_intent_split, + "cohort_methods_intent_split": cohort_methods_intent_split, + } + + self._schemas = { + "propose_concept_set_diff": ConceptSetDiffInput.model_json_schema(), + "cohort_lint": CohortLintInput.model_json_schema(), + "phenotype_recommendation_plan": PhenotypeRecommendationPlanInput.model_json_schema(), + "phenotype_recommendations": PhenotypeRecommendationsInput.model_json_schema(), + "phenotype_recommendation_advice": PhenotypeRecommendationAdviceInput.model_json_schema(), + "phenotype_improvements": PhenotypeImprovementsInput.model_json_schema(), + "phenotype_intent_split": PhenotypeIntentSplitInput.model_json_schema(), + "cohort_methods_intent_split": CohortMethodsIntentSplitInput.model_json_schema(), + "keeper_concept_sets_generate": KeeperConceptSetsGenerateInput.model_json_schema(), + "keeper_profiles_generate": KeeperProfilesGenerateInput.model_json_schema(), + } + + def _debug_enabled(self) -> bool: + return os.getenv("STUDY_AGENT_DEBUG", "0") == "1" + + def _log_debug(self, message: str) -> None: + if self._debug_enabled(): + logger.debug(message) + + def _llm_diagnostics(self, result: Optional[LLMCallResult]) -> Dict[str, Any]: + if result is None: + return { + "llm_status": "disabled", + "llm_duration_seconds": 0.0, + "llm_error": "llm_result_missing", + "llm_parse_stage": None, + "llm_schema_valid": False, + } + diagnostics = { + "llm_status": result.status, + "llm_duration_seconds": result.duration_seconds, + "llm_error": result.error, + "llm_parse_stage": result.parse_stage, + "llm_schema_valid": bool(result.schema_valid) if result.schema_valid is not None else result.status == "ok", + "llm_request_mode": result.request_mode, + } + if result.missing_keys: + diagnostics["llm_missing_keys"] = result.missing_keys + if os.getenv("LLM_LOG_RESPONSE", "0") == "1": + diagnostics["llm_raw_response"] = result.raw_response + diagnostics["llm_content_text"] = result.content_text + return diagnostics + + def _timed_tool_call(self, name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: + started = time.perf_counter() + result = self.call_tool(name=name, arguments=arguments) + duration = time.perf_counter() - started + full_result = result.get("full_result") or {} + count = full_result.get("count") + if count is None and isinstance(full_result.get("concepts"), list): + count = len(full_result.get("concepts") or []) + logger.debug( + "keeper tool_call name=%s seconds=%.2f status=%s result_error=%s count=%s", + name, + duration, + result.get("status"), + full_result.get("error"), + count, + ) + return result + + def _fallback_reason_for_llm(self, result: Optional[LLMCallResult]) -> str: + if result is None: + return "llm_empty_result" + mapping = { + "timeout": "llm_timeout", + "http_error": "llm_http_error", + "transport_error": "llm_transport_error", + "json_parse_failed": "llm_json_parse_failed", + "schema_mismatch": "llm_schema_mismatch", + "disabled": "llm_disabled", + } + return mapping.get(result.status, "llm_empty_result") + + def _dedupe_concepts(self, concepts: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + deduped: List[Dict[str, Any]] = [] + seen: set[Any] = set() + for concept in concepts or []: + concept_id = concept.get("conceptId") + if concept_id in (None, ""): + continue + if concept_id in seen: + continue + seen.add(concept_id) + deduped.append(concept) + return deduped + + def _extract_keeper_concept_ids(self, result: Optional[LLMCallResult]) -> tuple[list[int], Optional[str]]: + if result is None: + return [], None + parsed_any = result.parsed_content + if isinstance(parsed_any, list): + extracted = [] + for concept in parsed_any: + if not isinstance(concept, dict): + continue + value = concept.get("conceptId", concept.get("concept_id")) + try: + extracted.append(int(value)) + except (TypeError, ValueError): + continue + if extracted: + return extracted, "top_level_array" + return [], None + if not isinstance(parsed_any, dict): + return [], None + parsed = parsed_any + ids = parsed.get("conceptId") + if ids not in (None, "") and not isinstance(ids, list): + try: + return [int(ids)], "scalar_conceptId" + except (TypeError, ValueError): + return [], None + if isinstance(ids, list): + extracted: list[int] = [] + for value in ids: + try: + extracted.append(int(value)) + except (TypeError, ValueError): + continue + return extracted, None + + concepts = parsed.get("concepts") + if isinstance(concepts, list): + extracted = [] + for concept in concepts: + if not isinstance(concept, dict): + continue + value = concept.get("conceptId", concept.get("concept_id")) + try: + extracted.append(int(value)) + except (TypeError, ValueError): + continue + if extracted: + return extracted, "concepts_array" + return [], None + + def _call_llm(self, prompt: str, required_keys: Optional[List[str]] = None) -> LLMCallResult: + try: + return coerce_llm_call_result(call_llm(prompt, required_keys=required_keys)) + except TypeError: + return coerce_llm_call_result(call_llm(prompt)) + + def _hydrate_phenotype_summaries( + self, + phenotype_ids: List[str], + thin_candidates: List[Dict[str, Any]], + ) -> List[Dict[str, Any]]: + thin_by_id = {row.get("phenotype_id"): row for row in thin_candidates if row.get("phenotype_id")} + hydrated: List[Dict[str, Any]] = [] + for phenotype_id in phenotype_ids: + thin = dict(thin_by_id.get(phenotype_id) or {}) + summary_result = self.call_tool( + name="phenotype_fetch_summary", + arguments={"phenotype_id": phenotype_id}, + ) + full = summary_result.get("full_result") or {} + summary_payload: Dict[str, Any] = {} + if isinstance(full.get("summary"), dict): + summary_payload = dict(full.get("summary") or {}) + elif isinstance(full.get("content"), dict): + summary_payload = dict(full.get("content") or {}) + elif isinstance(full, dict) and full.get("phenotype_id") == phenotype_id: + summary_payload = dict(full) + if summary_result.get("status") == "ok" and not full.get("error") and summary_payload: + row = dict(thin) + row.update(summary_payload) + if not row.get("name"): + row["name"] = row.get("phenotype_name") or "" + hydrated.append(row) + continue + if thin: + hydrated.append(thin) + return hydrated + + def list_tools(self) -> List[Dict[str, Any]]: + ... + + def call_tool(self, name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: + ... + + class StudyAgent: def __init__( self, @@ -60,6 +278,7 @@ def __init__( self._core_tools = { "propose_concept_set_diff": propose_concept_set_diff, "cohort_lint": cohort_lint, + "phenotype_recommendation_plan": phenotype_recommendation_plan, "phenotype_recommendations": phenotype_recommendations, "phenotype_recommendation_advice": phenotype_recommendation_advice, "phenotype_improvements": phenotype_improvements, @@ -69,6 +288,7 @@ def __init__( self._schemas = { "propose_concept_set_diff": ConceptSetDiffInput.model_json_schema(), "cohort_lint": CohortLintInput.model_json_schema(), + "phenotype_recommendation_plan": PhenotypeRecommendationPlanInput.model_json_schema(), "phenotype_recommendations": PhenotypeRecommendationsInput.model_json_schema(), "phenotype_recommendation_advice": PhenotypeRecommendationAdviceInput.model_json_schema(), "phenotype_improvements": PhenotypeImprovementsInput.model_json_schema(), @@ -208,6 +428,948 @@ def _call_llm(self, prompt: str, required_keys: Optional[List[str]] = None) -> L except TypeError: return coerce_llm_call_result(call_llm(prompt)) + def _hydrate_phenotype_summaries( + self, + phenotype_ids: List[str], + thin_candidates: List[Dict[str, Any]], + ) -> List[Dict[str, Any]]: + thin_by_id = {row.get("phenotype_id"): row for row in thin_candidates if row.get("phenotype_id")} + hydrated: List[Dict[str, Any]] = [] + for phenotype_id in phenotype_ids: + thin = dict(thin_by_id.get(phenotype_id) or {}) + summary_result = self.call_tool( + name="phenotype_fetch_summary", + arguments={"phenotype_id": phenotype_id}, + ) + full = summary_result.get("full_result") or {} + summary_payload: Dict[str, Any] = {} + if isinstance(full.get("summary"), dict): + summary_payload = dict(full.get("summary") or {}) + elif isinstance(full.get("content"), dict): + summary_payload = dict(full.get("content") or {}) + elif isinstance(full, dict) and full.get("phenotype_id") == phenotype_id: + summary_payload = dict(full) + if summary_result.get("status") == "ok" and not full.get("error") and summary_payload: + row = dict(thin) + row.update(summary_payload) + if not row.get("name"): + row["name"] = row.get("phenotype_name") or "" + hydrated.append(row) + continue + if thin: + hydrated.append(thin) + return hydrated + + def _compact_text_value(self, value: Any, limit: int = 180) -> str: + if value in (None, ""): + return "" + if isinstance(value, list): + text = ", ".join(str(item) for item in value if item not in (None, "")) + elif isinstance(value, dict): + try: + text = json.dumps(value, ensure_ascii=True, sort_keys=True) + except TypeError: + text = str(value) + else: + text = str(value) + if len(text) > limit: + return text[:limit] + f"... [truncated {len(text) - limit} chars]" + return text + + def _build_compact_planning_candidates(self, candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + compact_rows: List[Dict[str, Any]] = [] + for row in candidates: + if not isinstance(row, dict): + continue + compact_rows.append( + { + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset") or "", + "name": row.get("name") or row.get("phenotype_name") or "", + "short_description": self._compact_text_value(row.get("short_description"), limit=180), + "primary_clinical_topic": self._compact_text_value(row.get("primary_clinical_topic"), limit=120), + "phenotype_role": self._compact_text_value(row.get("phenotype_role"), limit=48), + "care_setting_scope": self._compact_text_value(row.get("care_setting_scope"), limit=64), + "population_scope": self._compact_text_value(row.get("population_scope"), limit=120), + "target_vs_context_conditions": self._compact_text_value(row.get("target_vs_context_conditions"), limit=220), + "exclude_from_primary_topic_match": self._compact_text_value(row.get("exclude_from_primary_topic_match"), limit=180), + "recommendation_summary": self._compact_text_value(row.get("recommendation_summary"), limit=220), + "retrieval_keywords": (row.get("retrieval_keywords") or [])[:6], + "executable_definition_status": row.get("executable_definition_status") or "", + "execution_readiness_score": row.get("execution_readiness_score"), + "score": row.get("score"), + "score_dense": row.get("score_dense"), + "score_sparse": row.get("score_sparse"), + } + ) + return compact_rows + + def _topic_tokens(self, value: Any) -> set[str]: + if value in (None, ""): + return set() + if isinstance(value, dict): + text = " ".join(str(part) for part in value.values() if part not in (None, "")) + elif isinstance(value, list): + text = " ".join(str(part) for part in value if part not in (None, "")) + else: + text = str(value) + return {token for token in _TOPIC_TOKEN_RE.findall(text.lower()) if len(token) > 1} + + def _flatten_text(self, value: Any) -> str: + if value in (None, ""): + return "" + if isinstance(value, dict): + return " ".join(self._flatten_text(part) for part in value.values()) + if isinstance(value, list): + return " ".join(self._flatten_text(part) for part in value) + return str(value).strip().lower() + + def _topic_overlap_score(self, query_tokens: set[str], candidate_tokens: set[str]) -> float: + if not query_tokens or not candidate_tokens: + return 0.0 + overlap = query_tokens & candidate_tokens + if not overlap: + return 0.0 + coverage = len(overlap) / max(1, len(query_tokens)) + precision = len(overlap) / max(1, len(candidate_tokens)) + return (coverage * 2.0) + precision + + def _normalize_clinical_topic_aliases(self, study_intent: str, aliases: Any) -> List[str]: + if not isinstance(aliases, list): + return [] + original_text = self._flatten_text(study_intent) + original_tokens = self._topic_tokens(study_intent) + normalized: List[str] = [] + seen: set[str] = set() + for value in aliases: + alias = self._flatten_text(value) + if not alias or alias in seen or alias == original_text: + continue + alias_tokens = self._topic_tokens(alias) + if len(alias_tokens) < 1 or len(alias_tokens) > 8: + continue + if alias in {"disease", "condition", "diagnosis", "bleeding", "infection", "disorder", "event"}: + continue + if len(alias) > 80: + continue + if original_tokens and alias_tokens and alias_tokens == original_tokens: + continue + normalized.append(alias) + seen.add(alias) + if len(normalized) >= 5: + break + return normalized + + def _best_alias_overlap( + self, + alias_tokens_list: List[tuple[str, set[str]]], + candidate_tokens: set[str], + ) -> tuple[float, str]: + best_score = 0.0 + best_alias = "" + for alias, alias_tokens in alias_tokens_list: + score = self._topic_overlap_score(alias_tokens, candidate_tokens) + if score > best_score: + best_score = score + best_alias = alias + return best_score, best_alias + + def _effective_intent_facets(self, study_intent: str, intent_facets: Dict[str, Any]) -> Dict[str, Any]: + effective = dict(intent_facets or {}) + text = self._flatten_text(study_intent) + role_cues_list = [self._flatten_text(item) for item in (effective.get("role_cues") or []) if item not in (None, "")] + care_setting_cues_list = [self._flatten_text(item) for item in (effective.get("care_setting_cues") or []) if item not in (None, "")] + population_cues_list = [self._flatten_text(item) for item in (effective.get("population_cues") or []) if item not in (None, "")] + + phenotype_role = self._flatten_text(effective.get("phenotype_role")) + if phenotype_role in {"", "unknown"}: + if any(cue in {"medication", "drug", "medication_based", "drug_based"} for cue in role_cues_list): + effective["phenotype_role"] = "medication_based" + elif any(cue == "procedure" for cue in role_cues_list): + effective["phenotype_role"] = "procedure" + elif any(cue == "diagnosis" for cue in role_cues_list): + effective["phenotype_role"] = "diagnosis" + + care_setting = self._flatten_text(effective.get("care_setting")) + if care_setting in {"", "unknown", "any"}: + if any(cue == "outpatient" for cue in care_setting_cues_list): + effective["care_setting"] = "outpatient" + elif any(cue == "inpatient" for cue in care_setting_cues_list): + effective["care_setting"] = "inpatient" + elif any(cue in {"ed", "emergency"} for cue in care_setting_cues_list): + effective["care_setting"] = "ed" + + if any(phrase in text for phrase in ("medication-based", "drug-based", "based on medication", "based on medications", "based on a medication", "based on drug", "based on drugs")): + effective["phenotype_role"] = "medication_based" + if any(phrase in text for phrase in ("outpatient", "ambulatory", "clinic", "office visit")): + effective["care_setting"] = "outpatient" + elif any(phrase in text for phrase in ("inpatient", "hospitalized", "hospitalisation", "hospitalization", "admission", "hospital stay")): + effective["care_setting"] = "inpatient" + elif any(phrase in text for phrase in ("emergency department", "urgent care")): + effective["care_setting"] = "ed" + + population_cue = self._flatten_text(effective.get("population_cue")) + if any(cue == "veterans" or cue == "veteran" for cue in population_cues_list) and "veteran" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; veterans" if effective.get("population_cue") else "veterans") + if any(cue == "va" for cue in population_cues_list) and "va" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; va" if effective.get("population_cue") else "va") + if any(token in text for token in ("veteran", "veterans")) and "veteran" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; veterans" if effective.get("population_cue") else "veterans") + if " va " in f" {text} " and "va" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; va" if effective.get("population_cue") else "va") + if any(token in self._flatten_text(effective.get("population_cue")) for token in ("veteran", "va")): + effective["geography_coding_preference"] = effective.get("geography_coding_preference") or "va" + + raw_aliases = ( + effective.get("clinical_topic_aliases") + or effective.get("condition_aliases") + or effective.get("topic_aliases") + or [] + ) + effective["clinical_topic_aliases"] = self._normalize_clinical_topic_aliases( + study_intent=study_intent, + aliases=raw_aliases, + ) + + return effective + + def _is_explicit_procedure_intent(self, study_intent: str, intent_facets: Dict[str, Any]) -> bool: + text = self._flatten_text(study_intent) + inferred_role = self._flatten_text(intent_facets.get("phenotype_role")) + if inferred_role == "procedure": + return True + return any(token in text for token in ("repair", "surgery", "surgical", "procedure", "bypass", "post op", "post-op", "postoperative")) + + def _is_explicit_hospitalization_intent(self, study_intent: str, intent_facets: Dict[str, Any]) -> bool: + text = self._flatten_text(study_intent) + care_setting = self._flatten_text(intent_facets.get("care_setting")) + if care_setting == "inpatient": + return True + return any(token in text for token in ("hospitalized", "hospitalisation", "hospitalization", "rehospitalization", "rehospitalisation", "inpatient", "admission", "hospital stay")) + + def _shortlist_target_count(self, max_results: int, max_shortlist: int) -> int: + return max(1, min(max_shortlist, max(max_results, 3))) + + def _shortlist_candidate_block_reason( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + study_intent: str, + ) -> Optional[str]: + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + name_text = self._flatten_text(row.get("name") or row.get("phenotype_name")) + topic_text = self._flatten_text(row.get("primary_clinical_topic")) + role_text = self._flatten_text(row.get("phenotype_role")) + signals_text = self._flatten_text(row.get("signals")) + combined = " ".join(part for part in (name_text, topic_text, role_text, signals_text) if part) + + if "withdrawn" in combined or "[w]" in name_text: + return "withdrawn" + + if intent_role == "diagnosis": + if (not self._is_explicit_procedure_intent(study_intent=study_intent, intent_facets=intent_facets)) and any( + token in combined for token in ("repair", "surgery", "surgical", "bypass", "post op", "post-op", "postoperative") + ): + return "procedure_for_diagnosis_intent" + if (not self._is_explicit_hospitalization_intent(study_intent=study_intent, intent_facets=intent_facets)) and any( + token in combined for token in ("exacerbation", "hospitalization", "hospitalisation", "rehospitalization", "rehospitalisation") + ): + return "narrow_hospitalization_subtype_for_plain_diagnosis" + + return None + + def _candidate_topic_signature(self, row: Dict[str, Any]) -> str: + topic_text = self._flatten_text(row.get("primary_clinical_topic")) + name_text = self._flatten_text(row.get("name") or row.get("phenotype_name")) + if topic_text and name_text: + return f"{topic_text}||{name_text}" + if topic_text: + return topic_text + return name_text + + def _is_diagnosis_class_candidate(self, row: Dict[str, Any]) -> bool: + role = self._flatten_text(row.get("phenotype_role")) + if "diagnos" in role or role in {"condition", "case"}: + return True + if any(token in role for token in ("outcome", "complication", "severity", "screen", "risk_score", "visit")): + return False + if any(token in role for token in ("covariate", "comorbid")): + return True + return False + + def _allow_plain_diagnosis_fill( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + study_intent: str, + current_count: int, + ) -> bool: + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + if intent_role != "diagnosis": + return True + if self._is_explicit_hospitalization_intent(study_intent=study_intent, intent_facets=intent_facets): + return True + if self._is_explicit_procedure_intent(study_intent=study_intent, intent_facets=intent_facets): + return True + if current_count < 2: + return True + return self._is_diagnosis_class_candidate(row) + + def _candidate_has_defensible_topic_match(self, row: Dict[str, Any], intent_facets: Dict[str, Any], study_intent: str) -> bool: + priority = self._candidate_metadata_priority( + row=row, + intent_facets=intent_facets, + search_rank=0, + study_intent=study_intent, + ) + kinds = {reason.get("kind") for reason in (priority.get("reasons") or []) if isinstance(reason, dict)} + has_primary = "topic_primary" in kinds or "dynamic_clinical_alias_match" in kinds + has_context_only = "context_without_primary" in kinds and not has_primary + has_mismatch_only = "topic_mismatch" in kinds and not has_primary + return not (has_context_only or has_mismatch_only) + + def _allow_quality_threshold_fill( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + study_intent: str, + current_count: int, + ) -> bool: + if current_count < 1: + return True + if self._candidate_has_defensible_topic_match(row=row, intent_facets=intent_facets, study_intent=study_intent): + return True + return False + + def _should_dedupe_shortlist(self, intent_facets: Dict[str, Any], study_intent: str) -> bool: + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + if intent_role != "diagnosis": + return False + return not self._is_explicit_hospitalization_intent(study_intent=study_intent, intent_facets=intent_facets) + + def _dedupe_shortlist_ids( + self, + shortlist_ids: List[str], + candidate_rows_by_id: Dict[str, Dict[str, Any]], + backfill_ids: List[str], + target_count: int, + ) -> tuple[List[str], Dict[str, Any]]: + deduped: List[str] = [] + seen_ids: set[str] = set() + seen_signatures: set[str] = set() + duplicate_topic_ids: List[str] = [] + + for phenotype_id in shortlist_ids or []: + phenotype_id = str(phenotype_id) + if phenotype_id in seen_ids: + continue + row = candidate_rows_by_id.get(phenotype_id) or {} + signature = self._candidate_topic_signature(row) + if signature and signature in seen_signatures: + duplicate_topic_ids.append(phenotype_id) + continue + deduped.append(phenotype_id) + seen_ids.add(phenotype_id) + if signature: + seen_signatures.add(signature) + + backfilled_ids: List[str] = [] + if duplicate_topic_ids and len(deduped) < target_count: + for phenotype_id in backfill_ids: + phenotype_id = str(phenotype_id) + if phenotype_id in seen_ids: + continue + row = candidate_rows_by_id.get(phenotype_id) or {} + signature = self._candidate_topic_signature(row) + if signature and signature in seen_signatures: + continue + deduped.append(phenotype_id) + seen_ids.add(phenotype_id) + backfilled_ids.append(phenotype_id) + if signature: + seen_signatures.add(signature) + if len(deduped) >= target_count: + break + + diagnostics = { + "duplicate_topic_ids": duplicate_topic_ids, + "backfilled_ids": backfilled_ids, + "applied": bool(duplicate_topic_ids), + } + return deduped, diagnostics + + def _build_shortlist_reasoning_notes( + self, + shortlist_rows: List[Dict[str, Any]], + intent_facets: Dict[str, Any], + shortlist_enforcement: Optional[Dict[str, Any]] = None, + ) -> List[str]: + notes: List[str] = [] + topic = self._compact_text_value(intent_facets.get("condition_or_topic"), limit=80) or "the requested clinical topic" + role = self._flatten_text(intent_facets.get("phenotype_role")).replace("_", " ") or "phenotype" + notes.append(f"Selected shortlisted candidates align with {topic} as a {role}-oriented study intent.") + + for row in shortlist_rows[:3]: + if not isinstance(row, dict): + continue + name = row.get("name") or row.get("phenotype_name") or str(row.get("phenotype_id") or "candidate") + candidate_role = self._flatten_text(row.get("phenotype_role")).replace("_", " ") or "phenotype" + candidate_topic = self._compact_text_value(row.get("primary_clinical_topic"), limit=80) or name + notes.append(f"Included {name} as a {candidate_role} candidate focused on {candidate_topic}.") + + enforcement = shortlist_enforcement or {} + replaced_ids = [str(pid) for pid in (enforcement.get("replaced_ids") or []) if pid not in (None, "")] + duplicate_topic_ids = [str(pid) for pid in (enforcement.get("duplicate_topic_ids") or []) if pid not in (None, "")] + if replaced_ids: + notes.append( + "Shortlist replaced lower-quality candidates after rerank enforcement: " + + ", ".join(replaced_ids[:4]) + + "." + ) + if duplicate_topic_ids: + notes.append( + "Near-duplicate topical variants were removed to preserve distinct recommendation coverage: " + + ", ".join(duplicate_topic_ids[:4]) + + "." + ) + return notes + + def _enforce_shortlist_against_rerank( + self, + shortlist_ids: List[str], + ranked_candidates: List[Dict[str, Any]], + intent_facets: Dict[str, Any], + study_intent: str, + max_results: int, + max_shortlist: int, + ) -> tuple[List[str], Dict[str, Any]]: + target_count = self._shortlist_target_count(max_results=max_results, max_shortlist=max_shortlist) + strict_top_k = min(len(ranked_candidates), max(target_count + 1, min(max_shortlist, 5))) + strict_pool = ranked_candidates[:strict_top_k] + strict_pool_ids = [row.get("phenotype_id") for row in strict_pool if row.get("phenotype_id")] + strict_pool_set = set(strict_pool_ids) + strict_pool_by_id = { + str(row.get("phenotype_id")): row + for row in strict_pool + if isinstance(row, dict) and row.get("phenotype_id") not in (None, "") + } + + blocked_candidate_reasons: Dict[str, str] = {} + preferred_pool_ids: List[str] = [] + blocked_pool_ids: List[str] = [] + for phenotype_id in strict_pool_ids: + row = strict_pool_by_id.get(str(phenotype_id)) or {} + block_reason = self._shortlist_candidate_block_reason( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + ) + if block_reason: + blocked_candidate_reasons[str(phenotype_id)] = block_reason + blocked_pool_ids.append(str(phenotype_id)) + else: + preferred_pool_ids.append(str(phenotype_id)) + + filtered_shortlist: List[str] = [] + dropped_ids: List[str] = [] + replaced_ids: List[str] = [] + plain_diagnosis_fill_skipped_ids: List[str] = [] + quality_threshold_skipped_ids: List[str] = [] + seen: set[str] = set() + for phenotype_id in shortlist_ids or []: + phenotype_id = str(phenotype_id) + if phenotype_id not in strict_pool_set: + if phenotype_id not in (None, ""): + dropped_ids.append(phenotype_id) + continue + if phenotype_id in blocked_candidate_reasons: + replaced_ids.append(phenotype_id) + continue + if phenotype_id not in seen: + filtered_shortlist.append(phenotype_id) + seen.add(phenotype_id) + + final_shortlist: List[str] = [] + for phenotype_id in preferred_pool_ids: + if phenotype_id not in filtered_shortlist or phenotype_id in final_shortlist: + continue + row = strict_pool_by_id.get(str(phenotype_id)) or {} + if not self._allow_plain_diagnosis_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + plain_diagnosis_fill_skipped_ids.append(str(phenotype_id)) + continue + if not self._allow_quality_threshold_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + quality_threshold_skipped_ids.append(str(phenotype_id)) + continue + final_shortlist.append(phenotype_id) + for phenotype_id in preferred_pool_ids: + if phenotype_id in final_shortlist: + continue + row = strict_pool_by_id.get(str(phenotype_id)) or {} + if not self._allow_plain_diagnosis_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + if str(phenotype_id) not in plain_diagnosis_fill_skipped_ids: + plain_diagnosis_fill_skipped_ids.append(str(phenotype_id)) + continue + if not self._allow_quality_threshold_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + if str(phenotype_id) not in quality_threshold_skipped_ids: + quality_threshold_skipped_ids.append(str(phenotype_id)) + continue + final_shortlist.append(phenotype_id) + if len(final_shortlist) >= target_count: + break + if not final_shortlist: + final_shortlist = preferred_pool_ids[:target_count] + + dedupe_diagnostics = { + "duplicate_topic_ids": [], + "backfilled_ids": [], + "applied": False, + } + if self._should_dedupe_shortlist(intent_facets=intent_facets, study_intent=study_intent): + final_shortlist, dedupe_diagnostics = self._dedupe_shortlist_ids( + shortlist_ids=final_shortlist, + candidate_rows_by_id=strict_pool_by_id, + backfill_ids=preferred_pool_ids, + target_count=target_count, + ) + + diagnostics = { + "strict_top_k": strict_top_k, + "strict_pool_ids": strict_pool_ids, + "planner_input_shortlist_ids": [str(pid) for pid in shortlist_ids or [] if pid not in (None, "")], + "dropped_ids": dropped_ids, + "replaced_ids": replaced_ids, + "blocked_pool_ids": blocked_pool_ids, + "blocked_candidate_reasons": blocked_candidate_reasons, + "preferred_pool_ids": preferred_pool_ids, + "plain_diagnosis_fill_skipped_ids": plain_diagnosis_fill_skipped_ids, + "quality_threshold_skipped_ids": quality_threshold_skipped_ids, + "duplicate_topic_ids": dedupe_diagnostics.get("duplicate_topic_ids") or [], + "dedupe_backfilled_ids": dedupe_diagnostics.get("backfilled_ids") or [], + "dedupe_applied": bool(dedupe_diagnostics.get("applied")), + "enforced_shortlist_ids": final_shortlist, + "enforced": final_shortlist != [str(pid) for pid in shortlist_ids or [] if pid not in (None, "")], + } + return final_shortlist, diagnostics + + def _candidate_metadata_priority( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + search_rank: int, + study_intent: str = "", + ) -> Dict[str, Any]: + topic_tokens = self._topic_tokens(intent_facets.get("condition_or_topic")) + alias_tokens_list = [ + (alias, self._topic_tokens(alias)) + for alias in (intent_facets.get("clinical_topic_aliases") or []) + if alias not in (None, "") + ] + role = self._flatten_text(row.get("phenotype_role")) + care_setting = self._flatten_text(intent_facets.get("care_setting")) + candidate_care_setting = self._flatten_text(row.get("care_setting_scope")) + primary_topic_tokens = self._topic_tokens(row.get("primary_clinical_topic")) + context_tokens = self._topic_tokens(row.get("target_vs_context_conditions")) + population_scope = self._flatten_text(row.get("population_scope")) + population_cue = self._flatten_text(intent_facets.get("population_cue")) + exclude_tags = self._flatten_text(row.get("exclude_from_primary_topic_match")) + source_dataset = self._flatten_text(row.get("source_dataset")) + signals_text = self._flatten_text(row.get("signals")) + name_text = self._flatten_text(row.get("name") or row.get("phenotype_name")) + short_description = self._flatten_text(row.get("short_description")) + recommendation_summary = self._flatten_text(row.get("recommendation_summary")) + retrieval_keywords = self._flatten_text(row.get("retrieval_keywords")) + combined_text = " ".join( + part for part in (name_text, short_description, recommendation_summary, signals_text, retrieval_keywords) if part + ) + procedure_focus_text = " ".join( + part for part in ( + name_text, + self._flatten_text(row.get("primary_clinical_topic")), + role, + ) if part + ) + reasons: List[Dict[str, Any]] = [] + + score = 0.0 + explicit_procedure_intent = self._is_explicit_procedure_intent(study_intent=study_intent, intent_facets=intent_facets) + + topic_score = self._topic_overlap_score(topic_tokens, primary_topic_tokens) + if topic_score: + delta = topic_score * 8.0 + score += delta + reasons.append({"kind": "topic_primary", "delta": round(delta, 4), "detail": row.get("primary_clinical_topic") or ""}) + context_score = self._topic_overlap_score(topic_tokens, context_tokens) + if context_score: + delta = context_score * 2.5 + score += delta + reasons.append({"kind": "topic_context", "delta": round(delta, 4), "detail": self._compact_text_value(row.get("target_vs_context_conditions"), limit=120)}) + + alias_primary_score, matched_primary_alias = self._best_alias_overlap(alias_tokens_list, primary_topic_tokens) + if alias_primary_score > topic_score and matched_primary_alias: + delta = alias_primary_score * 7.0 + score += delta + reasons.append({ + "kind": "dynamic_clinical_alias_match", + "delta": round(delta, 4), + "detail": {"alias": matched_primary_alias, "field": "primary_clinical_topic", "topic": row.get("primary_clinical_topic") or ""}, + }) + alias_context_score, matched_context_alias = self._best_alias_overlap(alias_tokens_list, context_tokens) + if alias_context_score > context_score and matched_context_alias: + delta = alias_context_score * 2.0 + score += delta + reasons.append({ + "kind": "dynamic_clinical_alias_context", + "delta": round(delta, 4), + "detail": {"alias": matched_context_alias, "field": "target_vs_context_conditions"}, + }) + + best_topic_score = max(topic_score, alias_primary_score) + best_context_score = max(context_score, alias_context_score) + if topic_tokens and best_topic_score <= 0.0 and best_context_score > 0.0: + score -= 3.0 + reasons.append({"kind": "context_without_primary", "delta": -3.0, "detail": "topic only matched context fields"}) + + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + if topic_tokens and best_topic_score <= 0.0 and best_context_score <= 0.0: + score -= 8.0 + reasons.append({"kind": "topic_mismatch", "delta": -8.0, "detail": row.get("primary_clinical_topic") or ""}) + if intent_role == "diagnosis": + if "diagnos" in role or role in {"condition", "case"}: + score += 4.0 + reasons.append({"kind": "role_match", "delta": 4.0, "detail": row.get("phenotype_role") or ""}) + if any(token in role for token in ("procedure", "surgery", "repair")): + score -= 4.5 + reasons.append({"kind": "role_penalty_procedure", "delta": -4.5, "detail": row.get("phenotype_role") or ""}) + if any(token in role for token in ("severity", "complication", "outcome", "screen", "risk_score")): + score -= 3.0 + reasons.append({"kind": "role_penalty_non_diagnosis", "delta": -3.0, "detail": row.get("phenotype_role") or ""}) + if any(token in role for token in ("covariate", "comorbid")): + score -= 3.5 + reasons.append({"kind": "role_penalty_covariate", "delta": -3.5, "detail": row.get("phenotype_role") or ""}) + if "visit" in role: + score -= 2.5 + reasons.append({"kind": "role_penalty_visit", "delta": -2.5, "detail": row.get("phenotype_role") or ""}) + if (not explicit_procedure_intent) and any(token in procedure_focus_text for token in ("repair", "surgery", "surgical", "bypass", "post op", "post-op", "postoperative")): + score -= 6.0 + reasons.append({"kind": "disease_vs_procedure_mismatch", "delta": -6.0, "detail": row.get("name") or row.get("primary_clinical_topic") or ""}) + if source_dataset == "ohdsi_phenotype_library" and any(token in procedure_focus_text for token in ("repair", "surgery", "surgical", "bypass", "post op", "post-op", "postoperative")): + score -= 2.0 + reasons.append({"kind": "native_ohdsi_cannot_override_procedure", "delta": -2.0, "detail": row.get("source_dataset") or ""}) + + if intent_role == "medication_based": + medication_text = any(token in combined_text for token in ("medication", "drug", "med codes", "insulin", "metformin", "antidiabetic", "meglitinide", "prescription", "therapy")) + medication_signal = "has_code_system:medication" in signals_text or medication_text + if "medication" in role or "drug" in role: + score += 8.0 + reasons.append({"kind": "role_match_medication", "delta": 8.0, "detail": row.get("phenotype_role") or ""}) + elif "diagnos" in role or role in {"condition", "case"}: + score -= 6.0 + reasons.append({"kind": "role_penalty_plain_diagnosis", "delta": -6.0, "detail": row.get("phenotype_role") or ""}) + elif any(token in role for token in ("covariate", "comorbid")): + score -= 3.5 + reasons.append({"kind": "role_penalty_covariate_for_medication", "delta": -3.5, "detail": row.get("phenotype_role") or ""}) + if medication_signal: + score += 4.5 + reasons.append({"kind": "medication_evidence", "delta": 4.5, "detail": row.get("name") or row.get("short_description") or ""}) + else: + score -= 4.0 + reasons.append({"kind": "missing_medication_evidence", "delta": -4.0, "detail": row.get("name") or row.get("short_description") or ""}) + if any(token in role for token in ("procedure", "screen", "severity", "outcome")): + score -= 3.5 + reasons.append({"kind": "role_penalty_non_medication", "delta": -3.5, "detail": row.get("phenotype_role") or ""}) + + if care_setting and care_setting != "any": + if candidate_care_setting and care_setting in candidate_care_setting: + score += 2.0 + reasons.append({"kind": "care_setting_match", "delta": 2.0, "detail": row.get("care_setting_scope") or ""}) + elif candidate_care_setting and candidate_care_setting not in {"any", "unspecified"}: + score -= 1.5 + reasons.append({"kind": "care_setting_penalty", "delta": -1.5, "detail": row.get("care_setting_scope") or ""}) + + if population_cue and population_scope: + if "veteran" in population_cue and "veteran" in population_scope: + score += 1.0 + reasons.append({"kind": "population_match_veteran", "delta": 1.0, "detail": row.get("population_scope") or ""}) + if "va" in population_cue and "va" in population_scope: + score += 1.0 + reasons.append({"kind": "population_match_va", "delta": 1.0, "detail": row.get("population_scope") or ""}) + if "va" in population_cue and "va_cipher" in source_dataset: + score += 0.75 + reasons.append({"kind": "source_match_va", "delta": 0.75, "detail": row.get("source_dataset") or ""}) + + if "context" in exclude_tags: + score -= 2.0 + reasons.append({"kind": "exclude_context", "delta": -2.0, "detail": row.get("exclude_from_primary_topic_match") or []}) + if "comorbid" in exclude_tags or "covariate" in exclude_tags: + score -= 3.0 + reasons.append({"kind": "exclude_comorbidity", "delta": -3.0, "detail": row.get("exclude_from_primary_topic_match") or []}) + if any(token in exclude_tags for token in ("procedure", "surgery", "post-op", "postop")): + score -= 4.0 + reasons.append({"kind": "exclude_procedure", "delta": -4.0, "detail": row.get("exclude_from_primary_topic_match") or []}) + if any(token in exclude_tags for token in ("severity", "complication", "outcome", "screen")): + score -= 2.5 + reasons.append({"kind": "exclude_non_diagnosis", "delta": -2.5, "detail": row.get("exclude_from_primary_topic_match") or []}) + + if "withdrawn" in signals_text or "[w]" in name_text: + score -= 12.0 + reasons.append({"kind": "status_withdrawn", "delta": -12.0, "detail": row.get("signals") or row.get("name") or ""}) + if "prediction" in signals_text or "prediction" in name_text: + score -= 4.0 + reasons.append({"kind": "status_prediction", "delta": -4.0, "detail": row.get("signals") or row.get("name") or ""}) + if "screening" in role or "screening" in name_text: + score -= 2.5 + reasons.append({"kind": "screening_penalty", "delta": -2.5, "detail": row.get("name") or row.get("phenotype_role") or ""}) + + readiness_delta = float(row.get("execution_readiness_score") or 0.0) * 0.25 + score += readiness_delta + reasons.append({"kind": "execution_readiness", "delta": round(readiness_delta, 4), "detail": row.get("execution_readiness_score")}) + rank_delta = max(0.0, 5.0 - float(search_rank)) * 0.02 + score += rank_delta + reasons.append({"kind": "search_rank_tiebreak", "delta": round(rank_delta, 4), "detail": search_rank}) + + return { + "metadata_score": score, + "retrieval_score": float(row.get("score") or 0.0), + "reasons": reasons, + } + + def _rerank_planning_candidates( + self, + candidates: List[Dict[str, Any]], + intent_facets: Dict[str, Any], + study_intent: str = "", + ) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + ranked_rows: List[tuple[float, float, int, Dict[str, Any], Dict[str, Any]]] = [] + for index, row in enumerate(candidates): + if not isinstance(row, dict): + continue + priority = self._candidate_metadata_priority( + row=row, + intent_facets=intent_facets, + search_rank=index, + study_intent=study_intent, + ) + metadata_score = float(priority.get("metadata_score") or 0.0) + retrieval_score = float(priority.get("retrieval_score") or 0.0) + ranked_rows.append((metadata_score, retrieval_score, -index, row, priority)) + ranked_rows.sort(reverse=True) + ranked_candidates: List[Dict[str, Any]] = [] + rerank_diagnostics: List[Dict[str, Any]] = [] + for rank_index, (metadata_score, retrieval_score, original_position, row, priority) in enumerate(ranked_rows, start=1): + ranked_candidates.append(row) + rerank_diagnostics.append( + { + "rank": rank_index, + "original_rank": (-original_position) + 1, + "phenotype_id": row.get("phenotype_id"), + "name": row.get("name") or row.get("phenotype_name") or "", + "metadata_score": round(metadata_score, 4), + "retrieval_score": round(retrieval_score, 4), + "phenotype_role": row.get("phenotype_role") or "", + "primary_clinical_topic": row.get("primary_clinical_topic") or "", + "care_setting_scope": row.get("care_setting_scope") or "", + "exclude_from_primary_topic_match": row.get("exclude_from_primary_topic_match") or [], + "reasons": priority.get("reasons") or [], + } + ) + return ranked_candidates, rerank_diagnostics + + def _validate_final_recommendation_payload( + self, + llm_payload: Optional[Dict[str, Any]], + catalog_rows: List[Dict[str, Any]], + ) -> tuple[Optional[Dict[str, Any]], Dict[str, Any]]: + diagnostics: Dict[str, Any] = { + "rejected": False, + "reason": None, + "invalid_ids": [], + "duplicate_ids": [], + "allowed_ids": [row.get("phenotype_id") for row in catalog_rows if row.get("phenotype_id")], + } + if not isinstance(llm_payload, dict): + return llm_payload, diagnostics + + raw_recs = llm_payload.get("phenotype_recommendations") + if not isinstance(raw_recs, list): + diagnostics["rejected"] = True + diagnostics["reason"] = "missing_recommendations" + return {"plan": llm_payload.get("plan"), "phenotype_recommendations": []}, diagnostics + + if not raw_recs: + diagnostics["rejected"] = True + diagnostics["reason"] = "empty_recommendations" + return {"plan": llm_payload.get("plan"), "phenotype_recommendations": []}, diagnostics + + allowed_set = set(diagnostics["allowed_ids"]) + seen: set[str] = set() + invalid_ids: List[str] = [] + duplicate_ids: List[str] = [] + valid_unique = 0 + + for rec in raw_recs: + if not isinstance(rec, dict): + continue + phenotype_id = rec.get("phenotype_id") + if phenotype_id in (None, ""): + continue + phenotype_id = str(phenotype_id) + if phenotype_id not in allowed_set: + invalid_ids.append(phenotype_id) + continue + if phenotype_id in seen: + duplicate_ids.append(phenotype_id) + continue + seen.add(phenotype_id) + valid_unique += 1 + + diagnostics["invalid_ids"] = sorted(set(invalid_ids)) + diagnostics["duplicate_ids"] = sorted(set(duplicate_ids)) + diagnostics["valid_unique_count"] = valid_unique + if diagnostics["invalid_ids"] or diagnostics["duplicate_ids"] or valid_unique <= 0: + diagnostics["rejected"] = True + if diagnostics["invalid_ids"]: + diagnostics["reason"] = "invalid_ids" + elif diagnostics["duplicate_ids"]: + diagnostics["reason"] = "duplicate_ids" + else: + diagnostics["reason"] = "no_valid_recommendations" + return {"plan": llm_payload.get("plan"), "phenotype_recommendations": []}, diagnostics + + return llm_payload, diagnostics + + def _build_compact_final_candidates(self, candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + compact_rows: List[Dict[str, Any]] = [] + for row in candidates or []: + if not isinstance(row, dict): + continue + compact_rows.append( + { + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset"), + "name": row.get("name") or row.get("phenotype_name") or "", + "short_description": row.get("short_description") or "", + "primary_clinical_topic": row.get("primary_clinical_topic") or "", + "phenotype_role": row.get("phenotype_role") or "", + "care_setting_scope": row.get("care_setting_scope") or "", + "population_scope": row.get("population_scope") or "", + "recommendation_summary": row.get("recommendation_summary") or "", + "executable_definition_status": row.get("executable_definition_status") or "", + "execution_readiness_score": row.get("execution_readiness_score"), + "score": row.get("score"), + } + ) + return compact_rows + + def _default_final_recommendation_plan(self, study_intent: str) -> str: + return "Rank phenotypes matching the study intent." + + def _default_final_recommendation_justification(self, row: Dict[str, Any]) -> str: + phenotype_role = self._flatten_text(row.get("phenotype_role")).replace("_", " ") or "phenotype" + name = row.get("phenotype_name") or row.get("name") or "selected phenotype" + justification = f"Selected from the top reranked shortlisted candidates as a clinically aligned {phenotype_role} match." + if len(justification) > 200: + return "Selected from the top reranked shortlisted candidates as a clinically aligned match." + return justification + + def _build_deterministic_final_payload( + self, + llm_payload: Optional[Dict[str, Any]], + catalog_rows: List[Dict[str, Any]], + max_results: int, + study_intent: str, + ) -> tuple[Dict[str, Any], Dict[str, Any]]: + selected_rows = [row for row in catalog_rows[: max(0, max_results)] if isinstance(row, dict)] + selected_ids = [str(row.get("phenotype_id")) for row in selected_rows if row.get("phenotype_id") not in (None, "")] + selected_set = set(selected_ids) + explanation_by_id: Dict[str, Dict[str, Any]] = {} + duplicate_ids: List[str] = [] + invalid_ids: List[str] = [] + + if isinstance(llm_payload, dict): + raw_recs = llm_payload.get("phenotype_recommendations") + if isinstance(raw_recs, list): + for rec in raw_recs: + if not isinstance(rec, dict): + continue + phenotype_id = rec.get("phenotype_id") + if phenotype_id in (None, ""): + continue + phenotype_id = str(phenotype_id) + if phenotype_id not in selected_set: + invalid_ids.append(phenotype_id) + continue + if phenotype_id in explanation_by_id: + duplicate_ids.append(phenotype_id) + continue + explanation_by_id[phenotype_id] = rec + + recommendations: List[Dict[str, Any]] = [] + matched_ids: List[str] = [] + defaulted_ids: List[str] = [] + for row in selected_rows: + phenotype_id = str(row.get("phenotype_id") or "") + if not phenotype_id: + continue + llm_rec = explanation_by_id.get(phenotype_id) or {} + justification = llm_rec.get("justification") if isinstance(llm_rec.get("justification"), str) else "" + confidence = llm_rec.get("confidence") + if not justification.strip(): + justification = self._default_final_recommendation_justification(row) + defaulted_ids.append(phenotype_id) + else: + matched_ids.append(phenotype_id) + if not isinstance(confidence, (int, float)): + confidence = None + recommendations.append( + { + "phenotype_id": phenotype_id, + "phenotype_name": row.get("phenotype_name") or row.get("name") or "", + "justification": justification[:200], + "confidence": float(confidence) if isinstance(confidence, (int, float)) else None, + } + ) + + plan = "" + if isinstance(llm_payload, dict) and isinstance(llm_payload.get("plan"), str): + plan = llm_payload.get("plan") or "" + if not plan.strip(): + plan = self._default_final_recommendation_plan(study_intent) + + payload = { + "plan": plan[:300], + "phenotype_recommendations": recommendations, + } + diagnostics = { + "selected_ids": selected_ids, + "matched_llm_ids": matched_ids, + "defaulted_ids": defaulted_ids, + "invalid_llm_ids": sorted(set(invalid_ids)), + "duplicate_llm_ids": sorted(set(duplicate_ids)), + "used_llm_justification_count": len(matched_ids), + "used_default_justification_count": len(defaulted_ids), + } + return payload, diagnostics + def list_tools(self) -> List[Dict[str, Any]]: if self._mcp_client is not None: return self._mcp_client.list_tools() @@ -319,24 +1481,165 @@ def run_phenotype_recommendation_flow( "error": "phenotype_search_failed", "details": full, } + all_candidates = full.get("results") or [] if candidate_limit is None: candidate_limit = int(os.getenv("LLM_CANDIDATE_LIMIT", "5")) + candidate_limit = max(0, int(candidate_limit)) pre_truncation_count = len(all_candidates) - candidates = all_candidates - if candidate_limit > 0: - candidates = candidates[:candidate_limit] self._log_debug( - "phenotype_recommendation: candidate counts " - f"before={pre_truncation_count} after={len(candidates)} limit={candidate_limit}" + "phenotype_recommendation: search candidate counts " + f"before={pre_truncation_count} shortlist_limit={candidate_limit}" ) - self._log_debug("phenotype_recommendation: prompt bundle fetch start") + self._log_debug("phenotype_recommendation: intent prompt bundle fetch start") + intent_prompt_bundle = self.call_tool( + name="phenotype_prompt_bundle", + arguments={"task": "phenotype_recommendation_intent_facets"}, + ) + self._log_debug( + f"phenotype_recommendation: intent prompt bundle fetch end status={intent_prompt_bundle.get('status')}" + ) + intent_prompt_full = intent_prompt_bundle.get("full_result") or {} + if intent_prompt_bundle.get("status") != "ok" or intent_prompt_full.get("error"): + return { + "status": "error", + "error": "phenotype_prompt_bundle_failed", + "details": intent_prompt_bundle, + } + + intent_prompt = build_recommendation_intent_facets_prompt( + overview=intent_prompt_full.get("overview", ""), + spec=intent_prompt_full.get("spec", ""), + output_schema=intent_prompt_full.get("output_schema", {}), + study_intent=study_intent, + ) + self._log_debug(f"phenotype_recommendation: intent llm start prompt_chars={len(intent_prompt)}") + intent_llm_result = self._call_llm( + intent_prompt, + required_keys=["plan", "intent_facets", "reasoning_notes"], + ) + self._log_debug( + "phenotype_recommendation: intent llm end " + f"status={intent_llm_result.status} seconds={intent_llm_result.duration_seconds:.2f} parse_stage={intent_llm_result.parse_stage}" + ) + intent_payload = llm_result_payload(intent_llm_result) or {} + raw_intent_facets = intent_payload.get("intent_facets") + intent_facets = raw_intent_facets if isinstance(raw_intent_facets, dict) else {} + effective_intent_facets = self._effective_intent_facets(study_intent=study_intent, intent_facets=intent_facets) + raw_intent_notes = intent_payload.get("reasoning_notes") + if isinstance(raw_intent_notes, list): + intent_reasoning_notes = [str(note) for note in raw_intent_notes if note not in (None, "")] + elif isinstance(raw_intent_notes, str) and raw_intent_notes.strip(): + intent_reasoning_notes = [raw_intent_notes.strip()] + else: + intent_reasoning_notes = [] + intent_result = { + "plan": str(intent_payload.get("plan") or "Extract recommendation intent facets from the study intent."), + "intent_facets": intent_facets, + "reasoning_notes": intent_reasoning_notes, + "mode": "llm" if intent_payload else "stub", + } + + self._log_debug("phenotype_recommendation: plan prompt bundle fetch start") + plan_prompt_bundle = self.call_tool( + name="phenotype_prompt_bundle", + arguments={"task": "phenotype_recommendation_plan"}, + ) + self._log_debug( + f"phenotype_recommendation: plan prompt bundle fetch end status={plan_prompt_bundle.get('status')}" + ) + plan_prompt_full = plan_prompt_bundle.get("full_result") or {} + if plan_prompt_bundle.get("status") != "ok" or plan_prompt_full.get("error"): + return { + "status": "error", + "error": "phenotype_prompt_bundle_failed", + "details": plan_prompt_bundle, + } + + planning_window = int(os.getenv("LLM_PLANNING_CANDIDATE_LIMIT", str(max(candidate_limit, 12)))) + planning_window = max(candidate_limit, planning_window) + planning_window = min(max(0, planning_window), len(all_candidates)) + planning_seed_candidates = all_candidates[:planning_window] + planning_candidate_ids = [row.get("phenotype_id") for row in planning_seed_candidates if row.get("phenotype_id")] + planning_hydrated = self._hydrate_phenotype_summaries(planning_candidate_ids, planning_seed_candidates) + planning_ranked, planning_rerank_diagnostics = self._rerank_planning_candidates( + planning_hydrated, + effective_intent_facets, + study_intent=study_intent, + ) + planning_top_band = int(os.getenv("LLM_PLANNING_TOP_BAND", str(max((max_results or 0) + 2, 5)))) + planning_top_band = max(1, min(planning_top_band, len(planning_ranked))) if planning_ranked else 0 + planner_allowed_candidates = planning_ranked[:planning_top_band] if planning_top_band else [] + planning_candidates = self._build_compact_planning_candidates(planner_allowed_candidates) + self._log_debug( + "phenotype_recommendation: planning hydration " + f"candidates={len(planning_candidate_ids)} hydrated={len(planning_hydrated)} planner_allowed={len(planning_candidates)}" + ) + + plan_prompt = build_prompt( + overview=plan_prompt_full.get("overview", ""), + spec=plan_prompt_full.get("spec", ""), + output_schema=plan_prompt_full.get("output_schema", {}), + study_intent=study_intent, + candidates=planning_candidates, + max_results=max_results, + task="phenotype_recommendation_plan", + extra_dynamic={ + "maxShortlist": candidate_limit, + "intent_facets": effective_intent_facets, + }, + ) + self._log_debug( + f"phenotype_recommendation: plan llm start prompt_chars={len(plan_prompt)} candidate_count={len(planning_candidates)}" + ) + plan_llm_result = self._call_llm( + plan_prompt, + required_keys=["plan", "intent_facets", "shortlist_ids", "needs_more_search", "reasoning_notes"], + ) + self._log_debug( + "phenotype_recommendation: plan llm end " + f"status={plan_llm_result.status} seconds={plan_llm_result.duration_seconds:.2f} parse_stage={plan_llm_result.parse_stage}" + ) + plan_llm_payload = llm_result_payload(plan_llm_result) + planning = phenotype_recommendation_plan( + study_intent=study_intent, + catalog_rows=planning_candidates, + max_shortlist=candidate_limit, + llm_result=plan_llm_payload, + ) + + planner_shortlist_ids = planning.get("shortlist_ids") or [] + shortlist_ids, shortlist_enforcement = self._enforce_shortlist_against_rerank( + shortlist_ids=planner_shortlist_ids, + ranked_candidates=planning_ranked, + intent_facets=effective_intent_facets, + study_intent=study_intent, + max_results=max_results, + max_shortlist=candidate_limit, + ) + if shortlist_enforcement.get("enforced"): + planning["shortlist_ids"] = shortlist_ids + hydrated_candidates = self._hydrate_phenotype_summaries(shortlist_ids, all_candidates) + planning["reasoning_notes"] = self._build_shortlist_reasoning_notes( + shortlist_rows=hydrated_candidates, + intent_facets=effective_intent_facets, + shortlist_enforcement=shortlist_enforcement, + ) + self._log_debug( + "phenotype_recommendation: candidate hydration " + f"shortlist={len(shortlist_ids)} hydrated={len(hydrated_candidates)}" + ) + + selected_candidates = [row for row in hydrated_candidates[: max(0, max_results)] if isinstance(row, dict)] + compact_final_candidates = self._build_compact_final_candidates(selected_candidates) + + self._log_debug("phenotype_recommendation: final prompt bundle fetch start") prompt_bundle = self.call_tool( name="phenotype_prompt_bundle", arguments={"task": "phenotype_recommendations"}, ) - self._log_debug(f"phenotype_recommendation: prompt bundle fetch end status={prompt_bundle.get('status')}") + self._log_debug(f"phenotype_recommendation: final prompt bundle fetch end status={prompt_bundle.get('status')}") prompt_full = prompt_bundle.get("full_result") or {} if prompt_bundle.get("status") != "ok" or prompt_full.get("error"): return { @@ -345,61 +1648,347 @@ def run_phenotype_recommendation_flow( "details": prompt_bundle, } - prompt = build_prompt( + final_prompt = build_prompt( overview=prompt_full.get("overview", ""), spec=prompt_full.get("spec", ""), output_schema=prompt_full.get("output_schema", {}), study_intent=study_intent, - candidates=candidates, + candidates=compact_final_candidates, max_results=max_results, + task="phenotype_recommendations", + extra_dynamic={"intent_facets": effective_intent_facets}, ) self._log_debug( - f"phenotype_recommendation: llm start prompt_chars={len(prompt)} candidate_count={len(candidates)}" + f"phenotype_recommendation: final llm start prompt_chars={len(final_prompt)} candidate_count={len(compact_final_candidates)}" ) - llm_result = self._call_llm(prompt, required_keys=["plan", "phenotype_recommendations"]) + llm_result = self._call_llm(final_prompt, required_keys=["plan", "phenotype_recommendations"]) self._log_debug( - "phenotype_recommendation: llm end " + "phenotype_recommendation: final llm end " f"status={llm_result.status} seconds={llm_result.duration_seconds:.2f} parse_stage={llm_result.parse_stage}" ) + catalog_rows = [] - for row in candidates: + for row in selected_candidates: if not isinstance(row, dict): continue catalog_rows.append( { - "cohortId": row.get("cohortId"), - "cohortName": row.get("name") or "", + "phenotype_id": row.get("phenotype_id"), + "phenotype_name": row.get("name") or row.get("phenotype_name") or "", + "name": row.get("name") or row.get("phenotype_name") or "", "short_description": row.get("short_description"), + "primary_clinical_topic": row.get("primary_clinical_topic"), + "phenotype_role": row.get("phenotype_role"), } ) llm_payload = llm_result_payload(llm_result) + validated_llm_payload, final_validation = self._validate_final_recommendation_payload(llm_payload, catalog_rows) + if final_validation.get("rejected"): + self._log_debug( + "phenotype_recommendation: final validation rejected " + f"reason={final_validation.get('reason')} invalid_ids={final_validation.get('invalid_ids')} duplicates={final_validation.get('duplicate_ids')}" + ) + deterministic_llm_payload, final_deterministic = self._build_deterministic_final_payload( + llm_payload=llm_payload, + catalog_rows=catalog_rows, + max_results=max_results, + study_intent=study_intent, + ) + effective_final_payload = None if llm_payload is None else deterministic_llm_payload core_result = phenotype_recommendations( protocol_text=study_intent, catalog_rows=catalog_rows, max_results=max_results, - llm_result=llm_payload, + llm_result=effective_final_payload, ) - llm_used = llm_payload is not None - fallback_reason = None if llm_used else self._fallback_reason_for_llm(llm_result) - fallback_mode = None if llm_used else core_result.get("mode") + llm_used = bool(final_deterministic.get("used_llm_justification_count")) + if llm_used: + fallback_reason = None + fallback_mode = None + else: + fallback_reason = self._fallback_reason_for_llm(llm_result) if llm_payload is None else "llm_explanations_unusable" + fallback_mode = "stub" if llm_payload is None else core_result.get("mode") if fallback_reason: self._log_debug(f"phenotype_recommendation: fallback chosen reason={fallback_reason} mode={fallback_mode}") + final_diagnostics = self._llm_diagnostics(llm_result) + planning_diagnostics = self._llm_diagnostics(plan_llm_result) + intent_diagnostics = self._llm_diagnostics(intent_llm_result) + diagnostics = dict(final_diagnostics) + diagnostics["intent_facets"] = intent_diagnostics + diagnostics["planning"] = planning_diagnostics + diagnostics["planning_rerank"] = { + "intent_facets_raw": intent_facets, + "intent_facets_effective": effective_intent_facets, + "candidate_count": len(planning_rerank_diagnostics), + "planner_allowed_count": len(planning_candidates), + "planner_allowed_ids": [row.get("phenotype_id") for row in planner_allowed_candidates if row.get("phenotype_id")], + "shortlist_enforcement": shortlist_enforcement, + "candidates": planning_rerank_diagnostics, + } + diagnostics["final_validation"] = final_validation + diagnostics["final_deterministic"] = final_deterministic + diagnostics["final"] = final_diagnostics + return { "status": "ok", "search": full, + "intent_facets": intent_result, + "planning": planning, "llm_used": llm_used, "llm_status": llm_result.status, "fallback_reason": fallback_reason, "fallback_mode": fallback_mode, "candidate_limit": candidate_limit, "candidate_offset": candidate_offset or 0, - "candidate_count": len(candidates), + "candidate_count": len(hydrated_candidates), "candidate_count_before_truncation": pre_truncation_count, - "prompt_length_chars": len(prompt), + "plan_prompt_length_chars": len(plan_prompt), + "prompt_length_chars": len(final_prompt), "recommendations": core_result, - "diagnostics": self._llm_diagnostics(llm_result), + "diagnostics": diagnostics, + } + + def run_phenotype_definition_flow( + self, + phenotype_id: str, + ) -> Dict[str, Any]: + phenotype_id = str(phenotype_id or "").strip() + if not phenotype_id: + return {"status": "error", "error": "missing phenotype_id"} + if self._mcp_client is None: + return {"status": "error", "error": "MCP client unavailable"} + + summary_result = self.call_tool( + name="phenotype_fetch_summary", + arguments={"phenotype_id": phenotype_id}, + ) + summary_full = summary_result.get("full_result") or {} + summary_payload: Dict[str, Any] = {} + if isinstance(summary_full.get("summary"), dict): + summary_payload = dict(summary_full.get("summary") or {}) + elif isinstance(summary_full.get("content"), dict): + summary_payload = dict(summary_full.get("content") or {}) + elif isinstance(summary_full, dict) and summary_full.get("phenotype_id") == phenotype_id: + summary_payload = dict(summary_full) + + definition_result = self.call_tool( + name="phenotype_fetch_definition", + arguments={"phenotype_id": phenotype_id, "truncate": False}, + ) + definition_full = definition_result.get("full_result") or {} + definition_payload: Dict[str, Any] = {} + if isinstance(definition_full.get("definition"), dict): + definition_payload = dict(definition_full.get("definition") or {}) + elif isinstance(definition_full.get("content"), dict): + definition_payload = dict(definition_full.get("content") or {}) + elif isinstance(definition_full, dict) and definition_full: + definition_payload = dict(definition_full) + + if definition_result.get("status") != "ok" or definition_full.get("error") or not definition_payload: + return { + "status": "error", + "error": "phenotype_definition_fetch_failed", + "details": definition_result, + } + if summary_result.get("status") != "ok" or summary_full.get("error"): + return { + "status": "error", + "error": "phenotype_summary_fetch_failed", + "details": summary_result, + } + + document = { + "phenotype_id": phenotype_id, + "phenotype_name": summary_payload.get("name") or summary_payload.get("phenotype_name") or phenotype_id, + "source_dataset": summary_payload.get("source_dataset") or "", + "source_record_type": summary_payload.get("source_record_type") or "", + "catalog_metadata": summary_payload, + "definition": definition_payload, + "assembled_from": { + "catalog_metadata_source": "catalog.jsonl via phenotype_fetch_summary", + "definition_source": "definitions/ via phenotype_fetch_definition", + }, + } + return { + "status": "ok", + "phenotype_id": phenotype_id, + "document": document, + } + + def run_cohort_methods_specs_recommendation_flow( + self, + analytic_settings_description: str, + study_intent: str = "", + ) -> Dict[str, Any]: + import re as _re + + from study_agent_core.cohort_methods_spec_validation import ( + LLM_FILLED_SECTIONS, + backfill_section_from_defaults, + cohort_methods_spec_to_shell_recommendation, + validate_section, + validate_cohort_methods_spec, + ) + + if self._mcp_client is None: + raise RuntimeError("MCP client unavailable") + + bundle = self.call_tool(name="cohort_methods_prompt_bundle", arguments={}) + if bundle.get("status") != "ok": + raise RuntimeError(f"cohort_methods_prompt_bundle failed: {bundle}") + bundle_full = bundle.get("full_result") or {} + defaults_spec: Dict[str, Any] = bundle_full.get("defaults_spec", {}) + analysis_template: str = ( + bundle_full.get("analysis_specifications_template") + or bundle_full.get("annotated_template", "") + ) + json_field_descriptions: str = bundle_full.get("json_field_descriptions", "") + instruction: str = bundle_full.get("instruction_template", "") + output_style: str = bundle_full.get("output_style_template", "") + + defaults_snapshot: Dict[str, Any] = {} + input_method = "typed_text" + profile_name_default = "Recommended from free-text description" + + diagnostics: Dict[str, Any] = { + "llm_parse_stage": "ok", + "schema_valid": True, + "failed_sections": [], + "latency_ms": 0, + } + + def _fallback(status: str, *, reason: Optional[str] = None) -> Dict[str, Any]: + recommendation = cohort_methods_spec_to_shell_recommendation( + cohort_methods_spec=defaults_spec, + raw_description=analytic_settings_description or "", + defaults_snapshot=defaults_snapshot, + profile_name=defaults_spec.get("description") or defaults_spec.get("name") or profile_name_default, + input_method=input_method, + rec_status="backfilled", + ) + if reason: + diagnostics["reason"] = reason + diagnostics["schema_valid"] = False + return { + "status": status, + "recommendation": recommendation, + "cohort_methods_specifications": defaults_spec, + "section_rationales": {s: {"rationale": "", "confidence": "low"} for s in LLM_FILLED_SECTIONS}, + "diagnostics": diagnostics, + } + + if not analytic_settings_description or not analytic_settings_description.strip(): + diagnostics["llm_parse_stage"] = "json_extract_failed" + return _fallback("llm_parse_error", reason="analytic_settings_description is required") + + prompt_parts = [ + instruction, + "", + "", + analytic_settings_description.strip(), + "", + "", + "", + (study_intent or "").strip(), + "", + "", + "", + analysis_template, + "", + "", + "", + json_field_descriptions, + "", + "", + output_style, + ] + prompt = "\n".join(prompt_parts) + + llm_result = self._call_llm(prompt, required_keys=["specifications", "sectionRationales"]) + diagnostics.update(self._llm_diagnostics(llm_result)) + + payload: Optional[Dict[str, Any]] = getattr(llm_result, "parsed_content", None) + if payload is None: + extract_source = getattr(llm_result, "content_text", None) or getattr(llm_result, "raw_response", None) or "" + match = _re.search(r"```json\s*(\{.*?\})\s*```", extract_source, flags=_re.DOTALL) + if match: + try: + payload = json.loads(match.group(1)) + except Exception: + payload = None + diagnostics["llm_parse_stage"] = "json_decode_failed" + else: + diagnostics["llm_parse_stage"] = "json_extract_failed" + + if not isinstance(payload, dict) or "specifications" not in payload: + return _fallback("llm_parse_error") + + spec = payload.get("specifications") or {} + ok_top, missing = validate_cohort_methods_spec(spec) + if not ok_top: + diagnostics["llm_parse_stage"] = "schema_validation_failed" + diagnostics["missing_keys"] = missing + return _fallback("schema_validation_error") + + rationale_section_map = { + "getDbCohortMethodDataArgs": "study_population", + "createStudyPopArgs": "study_population", + "propensityScoreAdjustment": "propensity_score_adjustment", + "fitOutcomeModelArgs": "outcome_model", + } + rationales_in = payload.get("sectionRationales") or {} + rationales_out: Dict[str, Dict[str, Any]] = {} + for rationale_section in ("study_population", "time_at_risk", "propensity_score_adjustment", "outcome_model"): + incoming = rationales_in.get(rationale_section) if isinstance(rationales_in, dict) else None + if isinstance(incoming, dict): + rationales_out[rationale_section] = { + "rationale": str(incoming.get("rationale", "")), + "confidence": incoming.get("confidence", "low") if incoming.get("confidence") in {"high", "medium", "low"} else "low", + } + else: + rationales_out[rationale_section] = {"rationale": "", "confidence": "low"} + + for section in LLM_FILLED_SECTIONS: + rationale_section = rationale_section_map.get(section, section) + + section_value = spec.get(section) + if section == "propensityScoreAdjustment" and section not in spec: + section_value = { + "trimByPsArgs": spec.get("trimByPsArgs"), + "matchOnPsArgs": spec.get("matchOnPsArgs"), + "stratifyByPsArgs": spec.get("stratifyByPsArgs"), + "createPsArgs": spec.get("createPsArgs"), + } + ok_sec, violations = validate_section(section, section_value) + if not ok_sec: + if section == "propensityScoreAdjustment" and section not in defaults_spec: + for ps_section in ("trimByPsArgs", "matchOnPsArgs", "stratifyByPsArgs", "createPsArgs"): + spec[ps_section] = deepcopy(defaults_spec.get(ps_section)) + else: + spec = backfill_section_from_defaults(spec, defaults_spec, section) + diagnostics["failed_sections"].append(section) + rationales_out[rationale_section] = { + "rationale": (rationales_out[rationale_section].get("rationale") or "") + f" [backfilled: {'; '.join(violations)}]", + "confidence": "low", + } + + rec_status = "backfilled" if diagnostics["failed_sections"] else "received" + recommendation = cohort_methods_spec_to_shell_recommendation( + cohort_methods_spec=spec, + raw_description=analytic_settings_description, + defaults_snapshot=defaults_snapshot, + profile_name=spec.get("description") or spec.get("name") or profile_name_default, + input_method=input_method, + rec_status=rec_status, + ) + return { + "status": "ok", + "recommendation": recommendation, + "cohort_methods_specifications": spec, + "section_rationales": rationales_out, + "diagnostics": diagnostics, } def run_phenotype_recommendation_advice_flow( @@ -498,6 +2087,75 @@ def run_phenotype_intent_split_flow( "diagnostics": self._llm_diagnostics(llm_result), } + def run_cohort_methods_intent_split_flow( + self, + study_intent: str, + ) -> Dict[str, Any]: + if not study_intent: + return {"status": "error", "error": "missing study_intent"} + if self._mcp_client is None: + return {"status": "error", "error": "MCP client unavailable"} + prompt_bundle = self.call_tool( + name="cohort_methods_intent_split", + arguments={}, + ) + prompt_full = prompt_bundle.get("full_result") or {} + if prompt_bundle.get("status") != "ok" or prompt_full.get("error"): + return { + "status": "error", + "error": "cohort_methods_intent_split_prompt_failed", + "details": prompt_bundle, + } + + prompt = build_cohort_methods_intent_split_prompt( + overview=prompt_full.get("overview", ""), + spec=prompt_full.get("spec", ""), + output_schema=prompt_full.get("output_schema", {}), + study_intent=study_intent, + ) + self._log_debug("cohort_methods_intent_split: calling LLM") + llm_result = self._call_llm( + prompt, + required_keys=[ + "status", + "target_statement", + "comparator_statement", + "outcome_statement", + "outcome_statements", + "rationale", + ], + ) + self._log_debug( + "cohort_methods_intent_split: LLM returned " + f"status={llm_result.status} parse_stage={llm_result.parse_stage}" + ) + llm_payload = llm_result_payload(llm_result) + if llm_payload is None: + return { + "status": "error", + "error": "llm_unavailable", + "diagnostics": self._llm_diagnostics(llm_result), + } + core_result = cohort_methods_intent_split( + study_intent=study_intent, + llm_result=llm_payload, + ) + if core_result.get("error"): + return { + "status": "error", + "error": core_result.get("error"), + "details": core_result, + "diagnostics": self._llm_diagnostics(llm_result), + } + + return { + "status": "ok", + "llm_used": True, + "llm_status": llm_result.status, + "intent_split": core_result, + "diagnostics": self._llm_diagnostics(llm_result), + } + def run_phenotype_improvements_flow( self, protocol_text: str, diff --git a/acp_agent/study_agent_acp/demo_shell.py b/acp_agent/study_agent_acp/demo_shell.py index ca83e07..01a2e67 100644 --- a/acp_agent/study_agent_acp/demo_shell.py +++ b/acp_agent/study_agent_acp/demo_shell.py @@ -453,10 +453,10 @@ def _handle_recommend(self, argv: Sequence[str]) -> None: print(f"status: {result.get('status')}") print(f"recommendations: {len(recommendations)}") for idx, item in enumerate(recommendations, start=1): - cohort_id = item.get("cohortId", "") - cohort_name = item.get("cohortName") or item.get("name") or "" - reasoning = item.get("reason") or item.get("rationale") or "" - print(f"{idx}. cohortId={cohort_id} name={cohort_name}") + phenotype_id = item.get("phenotype_id", "") + phenotype_name = item.get("phenotype_name") or item.get("name") or "" + reasoning = item.get("reason") or item.get("rationale") or item.get("justification") or "" + print(f"{idx}. phenotype_id={phenotype_id} name={phenotype_name}") if reasoning: print(f" {reasoning}") self._print_llm_summary(result) diff --git a/acp_agent/study_agent_acp/llm_client.py b/acp_agent/study_agent_acp/llm_client.py index 4a87b9c..b96fec9 100644 --- a/acp_agent/study_agent_acp/llm_client.py +++ b/acp_agent/study_agent_acp/llm_client.py @@ -46,13 +46,17 @@ def build_prompt( study_intent: str, candidates: list[dict[str, Any]], max_results: int, + task: str = "phenotype_recommendations", + extra_dynamic: Optional[Dict[str, Any]] = None, ) -> str: dynamic = { - "task": "phenotype_recommendations", + "task": task, "study_intent": study_intent, "candidates": candidates, "maxResults": max_results, } + if extra_dynamic: + dynamic.update(extra_dynamic) strict_rules = "\n\n".join( [ "STRICT OUTPUT RULES:", @@ -60,7 +64,7 @@ def build_prompt( "Return exactly ONE JSON object that matches the output schema.", "Do NOT wrap output in markdown, code fences, or prose.", "If uncertain, return required keys with empty arrays/strings.", - "Use ONLY cohortIds from the allowed list in candidates.", + "Use ONLY phenotype_ids from the allowed list in candidates.", "Keep output under 10 KB.", ] ) @@ -143,14 +147,14 @@ def build_lint_prompt( return "\n\n".join([s for s in sections if s]) -def build_advice_prompt( +def build_recommendation_intent_facets_prompt( overview: str, spec: str, output_schema: Dict[str, Any], study_intent: str, ) -> str: dynamic = { - "task": "phenotype_recommendation_advice", + "task": "phenotype_recommendation_intent_facets", "study_intent": study_intent, } strict_rules = "\n\n".join( @@ -175,14 +179,14 @@ def build_advice_prompt( return "\n\n".join([s for s in sections if s]) -def build_intent_split_prompt( +def build_advice_prompt( overview: str, spec: str, output_schema: Dict[str, Any], study_intent: str, ) -> str: dynamic = { - "task": "phenotype_intent_split", + "task": "phenotype_recommendation_advice", "study_intent": study_intent, } strict_rules = "\n\n".join( @@ -192,7 +196,7 @@ def build_intent_split_prompt( "Return exactly ONE JSON object that matches the output schema.", "Do NOT wrap output in markdown, code fences, or prose.", "If uncertain, return required keys with empty arrays/strings.", - "Keep output under 6 KB.", + "Keep output under 8 KB.", ] ) sections = [ @@ -207,6 +211,82 @@ def build_intent_split_prompt( return "\n\n".join([s for s in sections if s]) +def _build_split_prompt( + overview: str, + spec: str, + output_schema: Dict[str, Any], + study_intent: str, + *, + task: str, + max_kb: int, + uncertainty_rule: str, + extra_rules: Optional[Sequence[str]] = None, +) -> str: + dynamic = { + "task": task, + "study_intent": study_intent, + } + rules = [ + "STRICT OUTPUT RULES:", + spec, + "Return exactly ONE JSON object that matches the output schema.", + "Do NOT return the output schema itself or wrap the answer inside a properties object.", + "Do NOT wrap output in markdown, code fences, or prose.", + uncertainty_rule, + ] + if extra_rules: + rules.extend(extra_rules) + rules.append(f"Keep output under {max_kb} KB.") + strict_rules = "\n\n".join(rules) + sections = [ + overview, + "OUTPUT SCHEMA (JSON):", + json.dumps(output_schema, ensure_ascii=True), + "Below is dynamic content to analyze. Do not act until after STRICT OUTPUT RULES.", + "DYNAMIC INPUT (JSON):", + json.dumps(dynamic, ensure_ascii=True), + strict_rules, + ] + return "\n\n".join([s for s in sections if s]) + + +def build_intent_split_prompt( + overview: str, + spec: str, + output_schema: Dict[str, Any], + study_intent: str, +) -> str: + return _build_split_prompt( + overview=overview, + spec=spec, + output_schema=output_schema, + study_intent=study_intent, + task="phenotype_intent_split", + max_kb=6, + uncertainty_rule="If uncertain, return required keys with empty arrays/strings.", + ) + + +def build_cohort_methods_intent_split_prompt( + overview: str, + spec: str, + output_schema: Dict[str, Any], + study_intent: str, +) -> str: + return _build_split_prompt( + overview=overview, + spec=spec, + output_schema=output_schema, + study_intent=study_intent, + task="cohort_methods_intent_split", + max_kb=8, + uncertainty_rule="If uncertain, set status to needs_clarification and include clarifying questions.", + extra_rules=[ + "When status is ok, target_statement, comparator_statement, and outcome_statement must be non-empty.", + ], + ) + + def build_keeper_prompt( overview: str, spec: str, @@ -279,17 +359,15 @@ def _normalize_content_text(text: Optional[str]) -> str: return normalized -def _extract_json_object(text: str) -> Optional[str]: +def _extract_json_objects(text: str) -> list[str]: if not text: - return None - start = text.find("{") - if start == -1: - return None + return [] + objects: list[str] = [] depth = 0 in_string = False escape = False - for idx in range(start, len(text)): - ch = text[idx] + start: Optional[int] = None + for idx, ch in enumerate(text): if in_string: if escape: escape = False @@ -301,28 +379,100 @@ def _extract_json_object(text: str) -> Optional[str]: if ch == '"': in_string = True elif ch == "{": + if depth == 0: + start = idx depth += 1 elif ch == "}": depth -= 1 - if depth == 0: - return text[start : idx + 1] - return None + if depth == 0 and start is not None: + objects.append(text[start : idx + 1]) + start = None + return objects -def _parse_json_content(text: Optional[str]) -> tuple[Optional[Dict[str, Any]], Optional[str], Optional[str]]: +def _extract_json_object(text: str) -> Optional[str]: + objects = _extract_json_objects(text) + return objects[0] if objects else None + + +def _schema_property_value(value: Any) -> bool: + return isinstance(value, dict) and any( + key in value + for key in ( + "$ref", + "additionalProperties", + "anyOf", + "const", + "enum", + "items", + "oneOf", + "properties", + "required", + "type", + ) + ) + + +def _recover_schema_wrapped_output( + parsed: Dict[str, Any], + required_keys: Sequence[str], +) -> Optional[Dict[str, Any]]: + properties = parsed.get("properties") + if not isinstance(properties, dict): + return None + if not ({"$schema", "title", "type", "required", "additionalProperties"} & set(parsed)): + return None + + recovered = { + key: value + for key, value in properties.items() + if not _schema_property_value(value) + } + if not recovered: + return None + if required_keys and not all(key in recovered for key in required_keys): + return None + return recovered + + +def _parse_json_content( + text: Optional[str], + required_keys: Optional[Sequence[str]] = None, +) -> tuple[Optional[Dict[str, Any]], Optional[str], Optional[str]]: normalized = _normalize_content_text(text) if not normalized: return None, normalized, "content_missing" - candidate = _extract_json_object(normalized) - if candidate is None: + candidates = _extract_json_objects(normalized) + if not candidates: return None, normalized, "json_brace_extract" - try: - parsed = json.loads(candidate) - except json.JSONDecodeError: - return None, normalized, "json_loads" - if not isinstance(parsed, dict): - return None, normalized, "json_not_object" - return parsed, normalized, None + required = list(required_keys or []) + parsed_objects: list[Dict[str, Any]] = [] + saw_non_object = False + saw_decode_error = False + for candidate in candidates: + try: + parsed_candidate = json.loads(candidate) + except json.JSONDecodeError: + saw_decode_error = True + continue + if not isinstance(parsed_candidate, dict): + saw_non_object = True + continue + recovered = _recover_schema_wrapped_output(parsed_candidate, required) + if recovered is not None: + parsed_objects.append(recovered) + parsed_objects.append(parsed_candidate) + if not parsed_objects: + if saw_decode_error: + return None, normalized, "json_loads" + if saw_non_object: + return None, normalized, "json_not_object" + return None, normalized, "json_brace_extract" + if required: + for parsed_candidate in parsed_objects: + if all(key in parsed_candidate for key in required): + return parsed_candidate, normalized, None + return parsed_objects[0], normalized, None def _is_timeout_error(exc: BaseException) -> bool: @@ -551,7 +701,10 @@ def call_llm(prompt: str, required_keys: Optional[Sequence[str]] = None) -> LLMC parse_source = content_text if parse_source is None and data is None: parse_source = raw - parsed, normalized_content, parse_error_stage = _parse_json_content(parse_source) + parsed, normalized_content, parse_error_stage = _parse_json_content( + parse_source, + required_keys=required_keys, + ) result = LLMCallResult( status="ok" if parsed is not None else "json_parse_failed", raw_response=raw, diff --git a/acp_agent/study_agent_acp/phenotype_recommendation_utils.py b/acp_agent/study_agent_acp/phenotype_recommendation_utils.py new file mode 100644 index 0000000..55ac23e --- /dev/null +++ b/acp_agent/study_agent_acp/phenotype_recommendation_utils.py @@ -0,0 +1,918 @@ +import json +import re +from typing import Any, Dict, List, Optional + +_TOPIC_TOKEN_RE = re.compile(r"[a-z0-9]+") + + +class PhenotypeRecommendationMixin: + def _compact_text_value(self, value: Any, limit: int = 180) -> str: + if value in (None, ""): + return "" + if isinstance(value, list): + text = ", ".join(str(item) for item in value if item not in (None, "")) + elif isinstance(value, dict): + try: + text = json.dumps(value, ensure_ascii=True, sort_keys=True) + except TypeError: + text = str(value) + else: + text = str(value) + if len(text) > limit: + return text[:limit] + f"... [truncated {len(text) - limit} chars]" + return text + + def _build_compact_planning_candidates(self, candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + compact_rows: List[Dict[str, Any]] = [] + for row in candidates: + if not isinstance(row, dict): + continue + compact_rows.append( + { + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset") or "", + "name": row.get("name") or row.get("phenotype_name") or "", + "short_description": self._compact_text_value(row.get("short_description"), limit=180), + "primary_clinical_topic": self._compact_text_value(row.get("primary_clinical_topic"), limit=120), + "phenotype_role": self._compact_text_value(row.get("phenotype_role"), limit=48), + "care_setting_scope": self._compact_text_value(row.get("care_setting_scope"), limit=64), + "population_scope": self._compact_text_value(row.get("population_scope"), limit=120), + "target_vs_context_conditions": self._compact_text_value(row.get("target_vs_context_conditions"), limit=220), + "exclude_from_primary_topic_match": self._compact_text_value(row.get("exclude_from_primary_topic_match"), limit=180), + "recommendation_summary": self._compact_text_value(row.get("recommendation_summary"), limit=220), + "retrieval_keywords": (row.get("retrieval_keywords") or [])[:6], + "executable_definition_status": row.get("executable_definition_status") or "", + "execution_readiness_score": row.get("execution_readiness_score"), + "score": row.get("score"), + "score_dense": row.get("score_dense"), + "score_sparse": row.get("score_sparse"), + } + ) + return compact_rows + + def _topic_tokens(self, value: Any) -> set[str]: + if value in (None, ""): + return set() + if isinstance(value, dict): + text = " ".join(str(part) for part in value.values() if part not in (None, "")) + elif isinstance(value, list): + text = " ".join(str(part) for part in value if part not in (None, "")) + else: + text = str(value) + return {token for token in _TOPIC_TOKEN_RE.findall(text.lower()) if len(token) > 1} + + def _flatten_text(self, value: Any) -> str: + if value in (None, ""): + return "" + if isinstance(value, dict): + return " ".join(self._flatten_text(part) for part in value.values()) + if isinstance(value, list): + return " ".join(self._flatten_text(part) for part in value) + return str(value).strip().lower() + + def _topic_overlap_score(self, query_tokens: set[str], candidate_tokens: set[str]) -> float: + if not query_tokens or not candidate_tokens: + return 0.0 + overlap = query_tokens & candidate_tokens + if not overlap: + return 0.0 + coverage = len(overlap) / max(1, len(query_tokens)) + precision = len(overlap) / max(1, len(candidate_tokens)) + return (coverage * 2.0) + precision + + def _normalize_clinical_topic_aliases(self, study_intent: str, aliases: Any) -> List[str]: + if not isinstance(aliases, list): + return [] + original_text = self._flatten_text(study_intent) + original_tokens = self._topic_tokens(study_intent) + normalized: List[str] = [] + seen: set[str] = set() + for value in aliases: + alias = self._flatten_text(value) + if not alias or alias in seen or alias == original_text: + continue + alias_tokens = self._topic_tokens(alias) + if len(alias_tokens) < 1 or len(alias_tokens) > 8: + continue + if alias in {"disease", "condition", "diagnosis", "bleeding", "infection", "disorder", "event"}: + continue + if len(alias) > 80: + continue + if original_tokens and alias_tokens and alias_tokens == original_tokens: + continue + normalized.append(alias) + seen.add(alias) + if len(normalized) >= 5: + break + return normalized + + def _best_alias_overlap( + self, + alias_tokens_list: List[tuple[str, set[str]]], + candidate_tokens: set[str], + ) -> tuple[float, str]: + best_score = 0.0 + best_alias = "" + for alias, alias_tokens in alias_tokens_list: + score = self._topic_overlap_score(alias_tokens, candidate_tokens) + if score > best_score: + best_score = score + best_alias = alias + return best_score, best_alias + + def _effective_intent_facets(self, study_intent: str, intent_facets: Dict[str, Any]) -> Dict[str, Any]: + effective = dict(intent_facets or {}) + text = self._flatten_text(study_intent) + role_cues_list = [self._flatten_text(item) for item in (effective.get("role_cues") or []) if item not in (None, "")] + care_setting_cues_list = [self._flatten_text(item) for item in (effective.get("care_setting_cues") or []) if item not in (None, "")] + population_cues_list = [self._flatten_text(item) for item in (effective.get("population_cues") or []) if item not in (None, "")] + + phenotype_role = self._flatten_text(effective.get("phenotype_role")) + if phenotype_role in {"", "unknown"}: + if any(cue in {"medication", "drug", "medication_based", "drug_based"} for cue in role_cues_list): + effective["phenotype_role"] = "medication_based" + elif any(cue == "procedure" for cue in role_cues_list): + effective["phenotype_role"] = "procedure" + elif any(cue == "diagnosis" for cue in role_cues_list): + effective["phenotype_role"] = "diagnosis" + + care_setting = self._flatten_text(effective.get("care_setting")) + if care_setting in {"", "unknown", "any"}: + if any(cue == "outpatient" for cue in care_setting_cues_list): + effective["care_setting"] = "outpatient" + elif any(cue == "inpatient" for cue in care_setting_cues_list): + effective["care_setting"] = "inpatient" + elif any(cue in {"ed", "emergency"} for cue in care_setting_cues_list): + effective["care_setting"] = "ed" + + if any(phrase in text for phrase in ("medication-based", "drug-based", "based on medication", "based on medications", "based on a medication", "based on drug", "based on drugs")): + effective["phenotype_role"] = "medication_based" + if any(phrase in text for phrase in ("outpatient", "ambulatory", "clinic", "office visit")): + effective["care_setting"] = "outpatient" + elif any(phrase in text for phrase in ("inpatient", "hospitalized", "hospitalisation", "hospitalization", "admission", "hospital stay")): + effective["care_setting"] = "inpatient" + elif any(phrase in text for phrase in ("emergency department", "urgent care")): + effective["care_setting"] = "ed" + + population_cue = self._flatten_text(effective.get("population_cue")) + if any(cue == "veterans" or cue == "veteran" for cue in population_cues_list) and "veteran" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; veterans" if effective.get("population_cue") else "veterans") + if any(cue == "va" for cue in population_cues_list) and "va" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; va" if effective.get("population_cue") else "va") + if any(token in text for token in ("veteran", "veterans")) and "veteran" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; veterans" if effective.get("population_cue") else "veterans") + if " va " in f" {text} " and "va" not in population_cue: + effective["population_cue"] = (effective.get("population_cue") or "").strip() + ("; va" if effective.get("population_cue") else "va") + if any(token in self._flatten_text(effective.get("population_cue")) for token in ("veteran", "va")): + effective["geography_coding_preference"] = effective.get("geography_coding_preference") or "va" + + raw_aliases = ( + effective.get("clinical_topic_aliases") + or effective.get("condition_aliases") + or effective.get("topic_aliases") + or [] + ) + effective["clinical_topic_aliases"] = self._normalize_clinical_topic_aliases( + study_intent=study_intent, + aliases=raw_aliases, + ) + + return effective + + def _is_explicit_procedure_intent(self, study_intent: str, intent_facets: Dict[str, Any]) -> bool: + text = self._flatten_text(study_intent) + inferred_role = self._flatten_text(intent_facets.get("phenotype_role")) + if inferred_role == "procedure": + return True + return any(token in text for token in ("repair", "surgery", "surgical", "procedure", "bypass", "post op", "post-op", "postoperative")) + + def _is_explicit_hospitalization_intent(self, study_intent: str, intent_facets: Dict[str, Any]) -> bool: + text = self._flatten_text(study_intent) + care_setting = self._flatten_text(intent_facets.get("care_setting")) + if care_setting == "inpatient": + return True + return any(token in text for token in ("hospitalized", "hospitalisation", "hospitalization", "rehospitalization", "rehospitalisation", "inpatient", "admission", "hospital stay")) + + def _shortlist_target_count(self, max_results: int, max_shortlist: int) -> int: + return max(1, min(max_shortlist, max(max_results, 3))) + + def _shortlist_candidate_block_reason( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + study_intent: str, + ) -> Optional[str]: + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + name_text = self._flatten_text(row.get("name") or row.get("phenotype_name")) + topic_text = self._flatten_text(row.get("primary_clinical_topic")) + role_text = self._flatten_text(row.get("phenotype_role")) + signals_text = self._flatten_text(row.get("signals")) + combined = " ".join(part for part in (name_text, topic_text, role_text, signals_text) if part) + + if "withdrawn" in combined or "[w]" in name_text: + return "withdrawn" + + if intent_role == "diagnosis": + if (not self._is_explicit_procedure_intent(study_intent=study_intent, intent_facets=intent_facets)) and any( + token in combined for token in ("repair", "surgery", "surgical", "bypass", "post op", "post-op", "postoperative") + ): + return "procedure_for_diagnosis_intent" + if (not self._is_explicit_hospitalization_intent(study_intent=study_intent, intent_facets=intent_facets)) and any( + token in combined for token in ("exacerbation", "hospitalization", "hospitalisation", "rehospitalization", "rehospitalisation") + ): + return "narrow_hospitalization_subtype_for_plain_diagnosis" + + return None + + def _candidate_topic_signature(self, row: Dict[str, Any]) -> str: + topic_text = self._flatten_text(row.get("primary_clinical_topic")) + name_text = self._flatten_text(row.get("name") or row.get("phenotype_name")) + if topic_text and name_text: + return f"{topic_text}||{name_text}" + if topic_text: + return topic_text + return name_text + + def _is_diagnosis_class_candidate(self, row: Dict[str, Any]) -> bool: + role = self._flatten_text(row.get("phenotype_role")) + if "diagnos" in role or role in {"condition", "case"}: + return True + if any(token in role for token in ("outcome", "complication", "severity", "screen", "risk_score", "visit")): + return False + if any(token in role for token in ("covariate", "comorbid")): + return True + return False + + def _allow_plain_diagnosis_fill( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + study_intent: str, + current_count: int, + ) -> bool: + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + if intent_role != "diagnosis": + return True + if self._is_explicit_hospitalization_intent(study_intent=study_intent, intent_facets=intent_facets): + return True + if self._is_explicit_procedure_intent(study_intent=study_intent, intent_facets=intent_facets): + return True + if current_count < 2: + return True + return self._is_diagnosis_class_candidate(row) + + def _candidate_has_defensible_topic_match(self, row: Dict[str, Any], intent_facets: Dict[str, Any], study_intent: str) -> bool: + priority = self._candidate_metadata_priority( + row=row, + intent_facets=intent_facets, + search_rank=0, + study_intent=study_intent, + ) + kinds = {reason.get("kind") for reason in (priority.get("reasons") or []) if isinstance(reason, dict)} + has_primary = "topic_primary" in kinds or "dynamic_clinical_alias_match" in kinds + has_context_only = "context_without_primary" in kinds and not has_primary + has_mismatch_only = "topic_mismatch" in kinds and not has_primary + return not (has_context_only or has_mismatch_only) + + def _allow_quality_threshold_fill( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + study_intent: str, + current_count: int, + ) -> bool: + if current_count < 1: + return True + if self._candidate_has_defensible_topic_match(row=row, intent_facets=intent_facets, study_intent=study_intent): + return True + return False + + def _should_dedupe_shortlist(self, intent_facets: Dict[str, Any], study_intent: str) -> bool: + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + if intent_role != "diagnosis": + return False + return not self._is_explicit_hospitalization_intent(study_intent=study_intent, intent_facets=intent_facets) + + def _dedupe_shortlist_ids( + self, + shortlist_ids: List[str], + candidate_rows_by_id: Dict[str, Dict[str, Any]], + backfill_ids: List[str], + target_count: int, + ) -> tuple[List[str], Dict[str, Any]]: + deduped: List[str] = [] + seen_ids: set[str] = set() + seen_signatures: set[str] = set() + duplicate_topic_ids: List[str] = [] + + for phenotype_id in shortlist_ids or []: + phenotype_id = str(phenotype_id) + if phenotype_id in seen_ids: + continue + row = candidate_rows_by_id.get(phenotype_id) or {} + signature = self._candidate_topic_signature(row) + if signature and signature in seen_signatures: + duplicate_topic_ids.append(phenotype_id) + continue + deduped.append(phenotype_id) + seen_ids.add(phenotype_id) + if signature: + seen_signatures.add(signature) + + backfilled_ids: List[str] = [] + if duplicate_topic_ids and len(deduped) < target_count: + for phenotype_id in backfill_ids: + phenotype_id = str(phenotype_id) + if phenotype_id in seen_ids: + continue + row = candidate_rows_by_id.get(phenotype_id) or {} + signature = self._candidate_topic_signature(row) + if signature and signature in seen_signatures: + continue + deduped.append(phenotype_id) + seen_ids.add(phenotype_id) + backfilled_ids.append(phenotype_id) + if signature: + seen_signatures.add(signature) + if len(deduped) >= target_count: + break + + diagnostics = { + "duplicate_topic_ids": duplicate_topic_ids, + "backfilled_ids": backfilled_ids, + "applied": bool(duplicate_topic_ids), + } + return deduped, diagnostics + + def _build_shortlist_reasoning_notes( + self, + shortlist_rows: List[Dict[str, Any]], + intent_facets: Dict[str, Any], + shortlist_enforcement: Optional[Dict[str, Any]] = None, + ) -> List[str]: + notes: List[str] = [] + topic = self._compact_text_value(intent_facets.get("condition_or_topic"), limit=80) or "the requested clinical topic" + role = self._flatten_text(intent_facets.get("phenotype_role")).replace("_", " ") or "phenotype" + notes.append(f"Selected shortlisted candidates align with {topic} as a {role}-oriented study intent.") + + for row in shortlist_rows[:3]: + if not isinstance(row, dict): + continue + name = row.get("name") or row.get("phenotype_name") or str(row.get("phenotype_id") or "candidate") + candidate_role = self._flatten_text(row.get("phenotype_role")).replace("_", " ") or "phenotype" + candidate_topic = self._compact_text_value(row.get("primary_clinical_topic"), limit=80) or name + notes.append(f"Included {name} as a {candidate_role} candidate focused on {candidate_topic}.") + + enforcement = shortlist_enforcement or {} + replaced_ids = [str(pid) for pid in (enforcement.get("replaced_ids") or []) if pid not in (None, "")] + duplicate_topic_ids = [str(pid) for pid in (enforcement.get("duplicate_topic_ids") or []) if pid not in (None, "")] + if replaced_ids: + notes.append( + "Shortlist replaced lower-quality candidates after rerank enforcement: " + + ", ".join(replaced_ids[:4]) + + "." + ) + if duplicate_topic_ids: + notes.append( + "Near-duplicate topical variants were removed to preserve distinct recommendation coverage: " + + ", ".join(duplicate_topic_ids[:4]) + + "." + ) + return notes + + def _enforce_shortlist_against_rerank( + self, + shortlist_ids: List[str], + ranked_candidates: List[Dict[str, Any]], + intent_facets: Dict[str, Any], + study_intent: str, + max_results: int, + max_shortlist: int, + ) -> tuple[List[str], Dict[str, Any]]: + target_count = self._shortlist_target_count(max_results=max_results, max_shortlist=max_shortlist) + strict_top_k = min(len(ranked_candidates), max(target_count + 1, min(max_shortlist, 5))) + strict_pool = ranked_candidates[:strict_top_k] + strict_pool_ids = [row.get("phenotype_id") for row in strict_pool if row.get("phenotype_id")] + strict_pool_set = set(strict_pool_ids) + strict_pool_by_id = { + str(row.get("phenotype_id")): row + for row in strict_pool + if isinstance(row, dict) and row.get("phenotype_id") not in (None, "") + } + + blocked_candidate_reasons: Dict[str, str] = {} + preferred_pool_ids: List[str] = [] + blocked_pool_ids: List[str] = [] + for phenotype_id in strict_pool_ids: + row = strict_pool_by_id.get(str(phenotype_id)) or {} + block_reason = self._shortlist_candidate_block_reason( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + ) + if block_reason: + blocked_candidate_reasons[str(phenotype_id)] = block_reason + blocked_pool_ids.append(str(phenotype_id)) + else: + preferred_pool_ids.append(str(phenotype_id)) + + filtered_shortlist: List[str] = [] + dropped_ids: List[str] = [] + replaced_ids: List[str] = [] + plain_diagnosis_fill_skipped_ids: List[str] = [] + quality_threshold_skipped_ids: List[str] = [] + seen: set[str] = set() + for phenotype_id in shortlist_ids or []: + phenotype_id = str(phenotype_id) + if phenotype_id not in strict_pool_set: + if phenotype_id not in (None, ""): + dropped_ids.append(phenotype_id) + continue + if phenotype_id in blocked_candidate_reasons: + replaced_ids.append(phenotype_id) + continue + if phenotype_id not in seen: + filtered_shortlist.append(phenotype_id) + seen.add(phenotype_id) + + final_shortlist: List[str] = [] + for phenotype_id in preferred_pool_ids: + if phenotype_id not in filtered_shortlist or phenotype_id in final_shortlist: + continue + row = strict_pool_by_id.get(str(phenotype_id)) or {} + if not self._allow_plain_diagnosis_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + plain_diagnosis_fill_skipped_ids.append(str(phenotype_id)) + continue + if not self._allow_quality_threshold_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + quality_threshold_skipped_ids.append(str(phenotype_id)) + continue + final_shortlist.append(phenotype_id) + for phenotype_id in preferred_pool_ids: + if phenotype_id in final_shortlist: + continue + row = strict_pool_by_id.get(str(phenotype_id)) or {} + if not self._allow_plain_diagnosis_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + if str(phenotype_id) not in plain_diagnosis_fill_skipped_ids: + plain_diagnosis_fill_skipped_ids.append(str(phenotype_id)) + continue + if not self._allow_quality_threshold_fill( + row=row, + intent_facets=intent_facets, + study_intent=study_intent, + current_count=len(final_shortlist), + ): + if str(phenotype_id) not in quality_threshold_skipped_ids: + quality_threshold_skipped_ids.append(str(phenotype_id)) + continue + final_shortlist.append(phenotype_id) + if len(final_shortlist) >= target_count: + break + if not final_shortlist: + final_shortlist = preferred_pool_ids[:target_count] + + dedupe_diagnostics = { + "duplicate_topic_ids": [], + "backfilled_ids": [], + "applied": False, + } + if self._should_dedupe_shortlist(intent_facets=intent_facets, study_intent=study_intent): + final_shortlist, dedupe_diagnostics = self._dedupe_shortlist_ids( + shortlist_ids=final_shortlist, + candidate_rows_by_id=strict_pool_by_id, + backfill_ids=preferred_pool_ids, + target_count=target_count, + ) + + diagnostics = { + "strict_top_k": strict_top_k, + "strict_pool_ids": strict_pool_ids, + "planner_input_shortlist_ids": [str(pid) for pid in shortlist_ids or [] if pid not in (None, "")], + "dropped_ids": dropped_ids, + "replaced_ids": replaced_ids, + "blocked_pool_ids": blocked_pool_ids, + "blocked_candidate_reasons": blocked_candidate_reasons, + "preferred_pool_ids": preferred_pool_ids, + "plain_diagnosis_fill_skipped_ids": plain_diagnosis_fill_skipped_ids, + "quality_threshold_skipped_ids": quality_threshold_skipped_ids, + "duplicate_topic_ids": dedupe_diagnostics.get("duplicate_topic_ids") or [], + "dedupe_backfilled_ids": dedupe_diagnostics.get("backfilled_ids") or [], + "dedupe_applied": bool(dedupe_diagnostics.get("applied")), + "enforced_shortlist_ids": final_shortlist, + "enforced": final_shortlist != [str(pid) for pid in shortlist_ids or [] if pid not in (None, "")], + } + return final_shortlist, diagnostics + + def _candidate_metadata_priority( + self, + row: Dict[str, Any], + intent_facets: Dict[str, Any], + search_rank: int, + study_intent: str = "", + ) -> Dict[str, Any]: + topic_tokens = self._topic_tokens(intent_facets.get("condition_or_topic")) + alias_tokens_list = [ + (alias, self._topic_tokens(alias)) + for alias in (intent_facets.get("clinical_topic_aliases") or []) + if alias not in (None, "") + ] + role = self._flatten_text(row.get("phenotype_role")) + care_setting = self._flatten_text(intent_facets.get("care_setting")) + candidate_care_setting = self._flatten_text(row.get("care_setting_scope")) + primary_topic_tokens = self._topic_tokens(row.get("primary_clinical_topic")) + context_tokens = self._topic_tokens(row.get("target_vs_context_conditions")) + population_scope = self._flatten_text(row.get("population_scope")) + population_cue = self._flatten_text(intent_facets.get("population_cue")) + exclude_tags = self._flatten_text(row.get("exclude_from_primary_topic_match")) + source_dataset = self._flatten_text(row.get("source_dataset")) + signals_text = self._flatten_text(row.get("signals")) + name_text = self._flatten_text(row.get("name") or row.get("phenotype_name")) + short_description = self._flatten_text(row.get("short_description")) + recommendation_summary = self._flatten_text(row.get("recommendation_summary")) + retrieval_keywords = self._flatten_text(row.get("retrieval_keywords")) + combined_text = " ".join( + part for part in (name_text, short_description, recommendation_summary, signals_text, retrieval_keywords) if part + ) + procedure_focus_text = " ".join( + part for part in ( + name_text, + self._flatten_text(row.get("primary_clinical_topic")), + role, + ) if part + ) + reasons: List[Dict[str, Any]] = [] + + score = 0.0 + explicit_procedure_intent = self._is_explicit_procedure_intent(study_intent=study_intent, intent_facets=intent_facets) + + topic_score = self._topic_overlap_score(topic_tokens, primary_topic_tokens) + if topic_score: + delta = topic_score * 8.0 + score += delta + reasons.append({"kind": "topic_primary", "delta": round(delta, 4), "detail": row.get("primary_clinical_topic") or ""}) + context_score = self._topic_overlap_score(topic_tokens, context_tokens) + if context_score: + delta = context_score * 2.5 + score += delta + reasons.append({"kind": "topic_context", "delta": round(delta, 4), "detail": self._compact_text_value(row.get("target_vs_context_conditions"), limit=120)}) + + alias_primary_score, matched_primary_alias = self._best_alias_overlap(alias_tokens_list, primary_topic_tokens) + if alias_primary_score > topic_score and matched_primary_alias: + delta = alias_primary_score * 7.0 + score += delta + reasons.append({ + "kind": "dynamic_clinical_alias_match", + "delta": round(delta, 4), + "detail": {"alias": matched_primary_alias, "field": "primary_clinical_topic", "topic": row.get("primary_clinical_topic") or ""}, + }) + alias_context_score, matched_context_alias = self._best_alias_overlap(alias_tokens_list, context_tokens) + if alias_context_score > context_score and matched_context_alias: + delta = alias_context_score * 2.0 + score += delta + reasons.append({ + "kind": "dynamic_clinical_alias_context", + "delta": round(delta, 4), + "detail": {"alias": matched_context_alias, "field": "target_vs_context_conditions"}, + }) + + best_topic_score = max(topic_score, alias_primary_score) + best_context_score = max(context_score, alias_context_score) + if topic_tokens and best_topic_score <= 0.0 and best_context_score > 0.0: + score -= 3.0 + reasons.append({"kind": "context_without_primary", "delta": -3.0, "detail": "topic only matched context fields"}) + + intent_role = self._flatten_text(intent_facets.get("phenotype_role")) + if topic_tokens and best_topic_score <= 0.0 and best_context_score <= 0.0: + score -= 8.0 + reasons.append({"kind": "topic_mismatch", "delta": -8.0, "detail": row.get("primary_clinical_topic") or ""}) + if intent_role == "diagnosis": + if "diagnos" in role or role in {"condition", "case"}: + score += 4.0 + reasons.append({"kind": "role_match", "delta": 4.0, "detail": row.get("phenotype_role") or ""}) + if any(token in role for token in ("procedure", "surgery", "repair")): + score -= 4.5 + reasons.append({"kind": "role_penalty_procedure", "delta": -4.5, "detail": row.get("phenotype_role") or ""}) + if any(token in role for token in ("severity", "complication", "outcome", "screen", "risk_score")): + score -= 3.0 + reasons.append({"kind": "role_penalty_non_diagnosis", "delta": -3.0, "detail": row.get("phenotype_role") or ""}) + if any(token in role for token in ("covariate", "comorbid")): + score -= 3.5 + reasons.append({"kind": "role_penalty_covariate", "delta": -3.5, "detail": row.get("phenotype_role") or ""}) + if "visit" in role: + score -= 2.5 + reasons.append({"kind": "role_penalty_visit", "delta": -2.5, "detail": row.get("phenotype_role") or ""}) + if (not explicit_procedure_intent) and any(token in procedure_focus_text for token in ("repair", "surgery", "surgical", "bypass", "post op", "post-op", "postoperative")): + score -= 6.0 + reasons.append({"kind": "disease_vs_procedure_mismatch", "delta": -6.0, "detail": row.get("name") or row.get("primary_clinical_topic") or ""}) + if source_dataset == "ohdsi_phenotype_library" and any(token in procedure_focus_text for token in ("repair", "surgery", "surgical", "bypass", "post op", "post-op", "postoperative")): + score -= 2.0 + reasons.append({"kind": "native_ohdsi_cannot_override_procedure", "delta": -2.0, "detail": row.get("source_dataset") or ""}) + + if intent_role == "medication_based": + medication_text = any(token in combined_text for token in ("medication", "drug", "med codes", "insulin", "metformin", "antidiabetic", "meglitinide", "prescription", "therapy")) + medication_signal = "has_code_system:medication" in signals_text or medication_text + if "medication" in role or "drug" in role: + score += 8.0 + reasons.append({"kind": "role_match_medication", "delta": 8.0, "detail": row.get("phenotype_role") or ""}) + elif "diagnos" in role or role in {"condition", "case"}: + score -= 6.0 + reasons.append({"kind": "role_penalty_plain_diagnosis", "delta": -6.0, "detail": row.get("phenotype_role") or ""}) + elif any(token in role for token in ("covariate", "comorbid")): + score -= 3.5 + reasons.append({"kind": "role_penalty_covariate_for_medication", "delta": -3.5, "detail": row.get("phenotype_role") or ""}) + if medication_signal: + score += 4.5 + reasons.append({"kind": "medication_evidence", "delta": 4.5, "detail": row.get("name") or row.get("short_description") or ""}) + else: + score -= 4.0 + reasons.append({"kind": "missing_medication_evidence", "delta": -4.0, "detail": row.get("name") or row.get("short_description") or ""}) + if any(token in role for token in ("procedure", "screen", "severity", "outcome")): + score -= 3.5 + reasons.append({"kind": "role_penalty_non_medication", "delta": -3.5, "detail": row.get("phenotype_role") or ""}) + + if care_setting and care_setting != "any": + if candidate_care_setting and care_setting in candidate_care_setting: + score += 2.0 + reasons.append({"kind": "care_setting_match", "delta": 2.0, "detail": row.get("care_setting_scope") or ""}) + elif candidate_care_setting and candidate_care_setting not in {"any", "unspecified"}: + score -= 1.5 + reasons.append({"kind": "care_setting_penalty", "delta": -1.5, "detail": row.get("care_setting_scope") or ""}) + + if population_cue and population_scope: + if "veteran" in population_cue and "veteran" in population_scope: + score += 1.0 + reasons.append({"kind": "population_match_veteran", "delta": 1.0, "detail": row.get("population_scope") or ""}) + if "va" in population_cue and "va" in population_scope: + score += 1.0 + reasons.append({"kind": "population_match_va", "delta": 1.0, "detail": row.get("population_scope") or ""}) + if "va" in population_cue and "va_cipher" in source_dataset: + score += 0.75 + reasons.append({"kind": "source_match_va", "delta": 0.75, "detail": row.get("source_dataset") or ""}) + + if "context" in exclude_tags: + score -= 2.0 + reasons.append({"kind": "exclude_context", "delta": -2.0, "detail": row.get("exclude_from_primary_topic_match") or []}) + if "comorbid" in exclude_tags or "covariate" in exclude_tags: + score -= 3.0 + reasons.append({"kind": "exclude_comorbidity", "delta": -3.0, "detail": row.get("exclude_from_primary_topic_match") or []}) + if any(token in exclude_tags for token in ("procedure", "surgery", "post-op", "postop")): + score -= 4.0 + reasons.append({"kind": "exclude_procedure", "delta": -4.0, "detail": row.get("exclude_from_primary_topic_match") or []}) + if any(token in exclude_tags for token in ("severity", "complication", "outcome", "screen")): + score -= 2.5 + reasons.append({"kind": "exclude_non_diagnosis", "delta": -2.5, "detail": row.get("exclude_from_primary_topic_match") or []}) + + if "withdrawn" in signals_text or "[w]" in name_text: + score -= 12.0 + reasons.append({"kind": "status_withdrawn", "delta": -12.0, "detail": row.get("signals") or row.get("name") or ""}) + if "prediction" in signals_text or "prediction" in name_text: + score -= 4.0 + reasons.append({"kind": "status_prediction", "delta": -4.0, "detail": row.get("signals") or row.get("name") or ""}) + if "screening" in role or "screening" in name_text: + score -= 2.5 + reasons.append({"kind": "screening_penalty", "delta": -2.5, "detail": row.get("name") or row.get("phenotype_role") or ""}) + + readiness_delta = float(row.get("execution_readiness_score") or 0.0) * 0.25 + score += readiness_delta + reasons.append({"kind": "execution_readiness", "delta": round(readiness_delta, 4), "detail": row.get("execution_readiness_score")}) + rank_delta = max(0.0, 5.0 - float(search_rank)) * 0.02 + score += rank_delta + reasons.append({"kind": "search_rank_tiebreak", "delta": round(rank_delta, 4), "detail": search_rank}) + + return { + "metadata_score": score, + "retrieval_score": float(row.get("score") or 0.0), + "reasons": reasons, + } + + def _rerank_planning_candidates( + self, + candidates: List[Dict[str, Any]], + intent_facets: Dict[str, Any], + study_intent: str = "", + ) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + ranked_rows: List[tuple[float, float, int, Dict[str, Any], Dict[str, Any]]] = [] + for index, row in enumerate(candidates): + if not isinstance(row, dict): + continue + priority = self._candidate_metadata_priority( + row=row, + intent_facets=intent_facets, + search_rank=index, + study_intent=study_intent, + ) + metadata_score = float(priority.get("metadata_score") or 0.0) + retrieval_score = float(priority.get("retrieval_score") or 0.0) + ranked_rows.append((metadata_score, retrieval_score, -index, row, priority)) + ranked_rows.sort(reverse=True) + ranked_candidates: List[Dict[str, Any]] = [] + rerank_diagnostics: List[Dict[str, Any]] = [] + for rank_index, (metadata_score, retrieval_score, original_position, row, priority) in enumerate(ranked_rows, start=1): + ranked_candidates.append(row) + rerank_diagnostics.append( + { + "rank": rank_index, + "original_rank": (-original_position) + 1, + "phenotype_id": row.get("phenotype_id"), + "name": row.get("name") or row.get("phenotype_name") or "", + "metadata_score": round(metadata_score, 4), + "retrieval_score": round(retrieval_score, 4), + "phenotype_role": row.get("phenotype_role") or "", + "primary_clinical_topic": row.get("primary_clinical_topic") or "", + "care_setting_scope": row.get("care_setting_scope") or "", + "exclude_from_primary_topic_match": row.get("exclude_from_primary_topic_match") or [], + "reasons": priority.get("reasons") or [], + } + ) + return ranked_candidates, rerank_diagnostics + + def _validate_final_recommendation_payload( + self, + llm_payload: Optional[Dict[str, Any]], + catalog_rows: List[Dict[str, Any]], + ) -> tuple[Optional[Dict[str, Any]], Dict[str, Any]]: + diagnostics: Dict[str, Any] = { + "rejected": False, + "reason": None, + "invalid_ids": [], + "duplicate_ids": [], + "allowed_ids": [row.get("phenotype_id") for row in catalog_rows if row.get("phenotype_id")], + } + if not isinstance(llm_payload, dict): + return llm_payload, diagnostics + + raw_recs = llm_payload.get("phenotype_recommendations") + if not isinstance(raw_recs, list): + diagnostics["rejected"] = True + diagnostics["reason"] = "missing_recommendations" + return {"plan": llm_payload.get("plan"), "phenotype_recommendations": []}, diagnostics + + if not raw_recs: + diagnostics["rejected"] = True + diagnostics["reason"] = "empty_recommendations" + return {"plan": llm_payload.get("plan"), "phenotype_recommendations": []}, diagnostics + + allowed_set = set(diagnostics["allowed_ids"]) + seen: set[str] = set() + invalid_ids: List[str] = [] + duplicate_ids: List[str] = [] + valid_unique = 0 + + for rec in raw_recs: + if not isinstance(rec, dict): + continue + phenotype_id = rec.get("phenotype_id") + if phenotype_id in (None, ""): + continue + phenotype_id = str(phenotype_id) + if phenotype_id not in allowed_set: + invalid_ids.append(phenotype_id) + continue + if phenotype_id in seen: + duplicate_ids.append(phenotype_id) + continue + seen.add(phenotype_id) + valid_unique += 1 + + diagnostics["invalid_ids"] = sorted(set(invalid_ids)) + diagnostics["duplicate_ids"] = sorted(set(duplicate_ids)) + diagnostics["valid_unique_count"] = valid_unique + if diagnostics["invalid_ids"] or diagnostics["duplicate_ids"] or valid_unique <= 0: + diagnostics["rejected"] = True + if diagnostics["invalid_ids"]: + diagnostics["reason"] = "invalid_ids" + elif diagnostics["duplicate_ids"]: + diagnostics["reason"] = "duplicate_ids" + else: + diagnostics["reason"] = "no_valid_recommendations" + return {"plan": llm_payload.get("plan"), "phenotype_recommendations": []}, diagnostics + + return llm_payload, diagnostics + + def _build_compact_final_candidates(self, candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + compact_rows: List[Dict[str, Any]] = [] + for row in candidates or []: + if not isinstance(row, dict): + continue + compact_rows.append( + { + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset"), + "name": row.get("name") or row.get("phenotype_name") or "", + "short_description": row.get("short_description") or "", + "primary_clinical_topic": row.get("primary_clinical_topic") or "", + "phenotype_role": row.get("phenotype_role") or "", + "care_setting_scope": row.get("care_setting_scope") or "", + "population_scope": row.get("population_scope") or "", + "recommendation_summary": row.get("recommendation_summary") or "", + "executable_definition_status": row.get("executable_definition_status") or "", + "execution_readiness_score": row.get("execution_readiness_score"), + "score": row.get("score"), + } + ) + return compact_rows + + def _default_final_recommendation_plan(self, study_intent: str) -> str: + return "Rank phenotypes matching the study intent." + + def _default_final_recommendation_justification(self, row: Dict[str, Any]) -> str: + phenotype_role = self._flatten_text(row.get("phenotype_role")).replace("_", " ") or "phenotype" + name = row.get("phenotype_name") or row.get("name") or "selected phenotype" + justification = f"Selected from the top reranked shortlisted candidates as a clinically aligned {phenotype_role} match." + if len(justification) > 200: + return "Selected from the top reranked shortlisted candidates as a clinically aligned match." + return justification + + def _build_deterministic_final_payload( + self, + llm_payload: Optional[Dict[str, Any]], + catalog_rows: List[Dict[str, Any]], + max_results: int, + study_intent: str, + ) -> tuple[Dict[str, Any], Dict[str, Any]]: + selected_rows = [row for row in catalog_rows[: max(0, max_results)] if isinstance(row, dict)] + selected_ids = [str(row.get("phenotype_id")) for row in selected_rows if row.get("phenotype_id") not in (None, "")] + selected_set = set(selected_ids) + explanation_by_id: Dict[str, Dict[str, Any]] = {} + duplicate_ids: List[str] = [] + invalid_ids: List[str] = [] + + if isinstance(llm_payload, dict): + raw_recs = llm_payload.get("phenotype_recommendations") + if isinstance(raw_recs, list): + for rec in raw_recs: + if not isinstance(rec, dict): + continue + phenotype_id = rec.get("phenotype_id") + if phenotype_id in (None, ""): + continue + phenotype_id = str(phenotype_id) + if phenotype_id not in selected_set: + invalid_ids.append(phenotype_id) + continue + if phenotype_id in explanation_by_id: + duplicate_ids.append(phenotype_id) + continue + explanation_by_id[phenotype_id] = rec + + recommendations: List[Dict[str, Any]] = [] + matched_ids: List[str] = [] + defaulted_ids: List[str] = [] + for row in selected_rows: + phenotype_id = str(row.get("phenotype_id") or "") + if not phenotype_id: + continue + llm_rec = explanation_by_id.get(phenotype_id) or {} + justification = llm_rec.get("justification") if isinstance(llm_rec.get("justification"), str) else "" + confidence = llm_rec.get("confidence") + if not justification.strip(): + justification = self._default_final_recommendation_justification(row) + defaulted_ids.append(phenotype_id) + else: + matched_ids.append(phenotype_id) + if not isinstance(confidence, (int, float)): + confidence = None + recommendations.append( + { + "phenotype_id": phenotype_id, + "phenotype_name": row.get("phenotype_name") or row.get("name") or "", + "justification": justification[:200], + "confidence": float(confidence) if isinstance(confidence, (int, float)) else None, + } + ) + + plan = "" + if isinstance(llm_payload, dict) and isinstance(llm_payload.get("plan"), str): + plan = llm_payload.get("plan") or "" + if not plan.strip(): + plan = self._default_final_recommendation_plan(study_intent) + + payload = { + "plan": plan[:300], + "phenotype_recommendations": recommendations, + } + diagnostics = { + "selected_ids": selected_ids, + "matched_llm_ids": matched_ids, + "defaulted_ids": defaulted_ids, + "invalid_llm_ids": sorted(set(invalid_ids)), + "duplicate_llm_ids": sorted(set(duplicate_ids)), + "used_llm_justification_count": len(matched_ids), + "used_default_justification_count": len(defaulted_ids), + } + return payload, diagnostics + diff --git a/acp_agent/study_agent_acp/server.py b/acp_agent/study_agent_acp/server.py index d2b422e..2077faf 100644 --- a/acp_agent/study_agent_acp/server.py +++ b/acp_agent/study_agent_acp/server.py @@ -14,6 +14,7 @@ SERVICES = [ {"name": "phenotype_recommendation", "endpoint": "/flows/phenotype_recommendation"}, + {"name": "phenotype_definition", "endpoint": "/flows/phenotype_definition"}, {"name": "phenotype_improvements", "endpoint": "/flows/phenotype_improvements"}, {"name": "concept_sets_review", "endpoint": "/flows/concept_sets_review"}, {"name": "cohort_critique_general_design", "endpoint": "/flows/cohort_critique_general_design"}, @@ -23,6 +24,8 @@ {"name": "keeper_profiles_generate", "endpoint": "/flows/keeper_profiles_generate"}, {"name": "phenotype_recommendation_advice", "endpoint": "/flows/phenotype_recommendation_advice"}, {"name": "phenotype_intent_split", "endpoint": "/flows/phenotype_intent_split"}, + {"name": "cohort_methods_intent_split", "endpoint": "/flows/cohort_methods_intent_split"}, + {"name": "cohort_methods_specifications_recommendation", "endpoint": "/flows/cohort_methods_specifications_recommendation"}, ] SERVICE_REGISTRY_PATH = os.getenv("STUDY_AGENT_SERVICE_REGISTRY", "docs/SERVICE_REGISTRY.yaml") logger = logging.getLogger("study_agent.acp") @@ -248,6 +251,24 @@ def do_POST(self) -> None: _write_json(self, status, result) return + if self.path == "/flows/phenotype_definition": + try: + body = _read_json(self) + except Exception as exc: + _write_json(self, 400, {"error": f"invalid_json: {exc}"}) + return + phenotype_id = body.get("phenotype_id") or "" + try: + result = self.agent.run_phenotype_definition_flow(phenotype_id=phenotype_id) + except Exception as exc: + if self.debug: + logger.exception("flow_failed name=phenotype_definition") + _write_json(self, 500, {"error": "flow_failed", "detail": str(exc) if self.debug else None}) + return + status = 200 if result.get("status") != "error" else 500 + _write_json(self, status, result) + return + if self.path == "/flows/phenotype_recommendation": try: body = _read_json(self) @@ -280,6 +301,31 @@ def do_POST(self) -> None: _write_json(self, status, result) return + if self.path == "/flows/cohort_methods_specifications_recommendation": + try: + body = _read_json(self) + except Exception as exc: + _write_json(self, 400, {"error": f"invalid_json: {exc}"}) + return + try: + from study_agent_core.models import CohortMethodSpecsRecommendationInput + payload = CohortMethodSpecsRecommendationInput(**body) + except Exception as exc: + _write_json(self, 422, {"error": f"invalid_payload: {exc}"}) + return + try: + result = self.agent.run_cohort_methods_specs_recommendation_flow( + analytic_settings_description=payload.analytic_settings_description, + study_intent=payload.study_intent or "", + ) + except Exception as exc: + if self.debug: + logger.exception("flow_failed name=cohort_methods_specifications_recommendation") + _write_json(self, 500, {"error": "flow_failed", "detail": str(exc) if self.debug else None}) + return + _write_json(self, 200, result) + return + if self.path == "/flows/phenotype_improvements": try: body = _read_json(self) @@ -558,6 +604,26 @@ def do_POST(self) -> None: _write_json(self, status, result) return + if self.path == "/flows/cohort_methods_intent_split": + try: + body = _read_json(self) + except Exception as exc: + _write_json(self, 400, {"error": f"invalid_json: {exc}"}) + return + study_intent = body.get("study_intent") or body.get("query") or "" + try: + result = self.agent.run_cohort_methods_intent_split_flow( + study_intent=study_intent, + ) + except Exception as exc: + if self.debug: + logger.exception("flow_failed name=cohort_methods_intent_split") + _write_json(self, 500, {"error": "flow_failed", "detail": str(exc) if self.debug else None}) + return + status = 200 if result.get("status") != "error" else 500 + _write_json(self, status, result) + return + _write_json(self, 404, {"error": "not_found"}) diff --git a/core/study_agent_core/__init__.py b/core/study_agent_core/__init__.py index 8a834b6..244d5af 100644 --- a/core/study_agent_core/__init__.py +++ b/core/study_agent_core/__init__.py @@ -1,4 +1,5 @@ from .tools import ( + cohort_methods_intent_split, cohort_lint, phenotype_improvements, phenotype_intent_split, @@ -9,6 +10,7 @@ ) __all__ = [ + "cohort_methods_intent_split", "cohort_lint", "phenotype_improvements", "phenotype_intent_split", diff --git a/core/study_agent_core/cohort_methods_spec_validation.py b/core/study_agent_core/cohort_methods_spec_validation.py new file mode 100644 index 0000000..87ebddb --- /dev/null +++ b/core/study_agent_core/cohort_methods_spec_validation.py @@ -0,0 +1,359 @@ +"""Pure validation, merge, and backfill helpers for cohort-method specs. + +No IO. No network. Rule updates live in the section checkers only. +""" +from __future__ import annotations + +from copy import deepcopy +from typing import Any, Dict, List, Tuple + + +COHORT_METHODS_SPEC_TOP_LEVEL_KEYS: List[str] = [ + "description", + "getDbCohortMethodDataArgs", + "createStudyPopArgs", + "trimByPsArgs", + "matchOnPsArgs", + "stratifyByPsArgs", + "createPsArgs", + "fitOutcomeModelArgs", +] + +LLM_FILLED_SECTIONS: List[str] = [ + "getDbCohortMethodDataArgs", + "createStudyPopArgs", + "propensityScoreAdjustment", + "fitOutcomeModelArgs", +] + +_REMOVE_DUP = {"keep all", "keep first", "remove all", "keep first, truncate to second"} +_ANCHOR = {"cohort start", "cohort end"} +_CALIPER_SCALE = {"propensity score", "standardized", "standardized logit"} +_BASE_SELECTION = {"all", "target", "comparator"} +_CV_TYPE = {"auto", "grid"} +_NOISE_LEVEL = {"silent", "quiet", "noisy"} +_MODEL_TYPE = {"logistic", "poisson", "cox"} + + +def validate_cohort_methods_spec(spec: Dict[str, Any]) -> Tuple[bool, List[str]]: + """Check top-level structural completeness. + + Returns (ok, missing_keys). Does not descend into section contents. + """ + if not isinstance(spec, dict): + return False, list(COHORT_METHODS_SPEC_TOP_LEVEL_KEYS) + missing = [k for k in COHORT_METHODS_SPEC_TOP_LEVEL_KEYS if k not in spec] + return (len(missing) == 0, missing) + + +def validate_section(section_name: str, value: Any) -> Tuple[bool, List[str]]: + """Check enum values and numeric ranges for a single LLM-filled section. + + Returns (ok, violations) where violations is a list of human-readable strings. + """ + if section_name not in LLM_FILLED_SECTIONS: + return False, [f"unknown section: {section_name}"] + checker = _SECTION_CHECKERS[section_name] + violations: List[str] = [] + checker(value, violations) + return (len(violations) == 0, violations) + + +def _require_object(section_name: str, value: Any, violations: List[str]) -> bool: + if isinstance(value, dict): + return True + violations.append(f"{section_name} must be an object") + return False + + +def _check_get_db_args(value: Any, violations: List[str]) -> None: + if not _require_object("getDbCohortMethodDataArgs", value, violations): + return + max_size = value.get("maxCohortSize") + if max_size is not None and isinstance(max_size, (int, float)) and max_size < 0: + violations.append("maxCohortSize must be >= 0") + washout = value.get("washoutPeriod") + if isinstance(washout, (int, float)) and washout < 0: + violations.append("washoutPeriod must be >= 0") + dup = value.get("removeDuplicateSubjects") + if dup is not None and dup not in _REMOVE_DUP: + violations.append(f"removeDuplicateSubjects must be one of {sorted(_REMOVE_DUP)}") + periods = value.get("studyPeriods") + if periods is not None and not isinstance(periods, list): + violations.append("studyPeriods must be a list") + + +def _check_study_pop(value: Any, violations: List[str]) -> None: + if not _require_object("createStudyPopArgs", value, violations): + return + lookback = value.get("priorOutcomeLookback", value.get("priorOutcomeLookBack")) + if isinstance(lookback, (int, float)) and lookback < 0: + violations.append("priorOutcomeLookback must be >= 0") + min_days = value.get("minDaysAtRisk") + if isinstance(min_days, (int, float)) and min_days < 0: + violations.append("minDaysAtRisk must be >= 0") + start = value.get("startAnchor") + end = value.get("endAnchor") + if start is not None and start not in _ANCHOR: + violations.append(f"startAnchor must be one of {sorted(_ANCHOR)}") + if end is not None and end not in _ANCHOR: + violations.append(f"endAnchor must be one of {sorted(_ANCHOR)}") + tars = value.get("timeAtRisks") + if tars is None: + return + if not isinstance(tars, list): + violations.append("timeAtRisks must be a list") + return + for idx, tar in enumerate(tars): + if not isinstance(tar, dict): + violations.append(f"timeAtRisks[{idx}] must be an object") + continue + start = tar.get("startAnchor") + end = tar.get("endAnchor") + if start is not None and start not in _ANCHOR: + violations.append(f"timeAtRisks[{idx}].startAnchor must be one of {sorted(_ANCHOR)}") + if end is not None and end not in _ANCHOR: + violations.append(f"timeAtRisks[{idx}].endAnchor must be one of {sorted(_ANCHOR)}") + min_days = tar.get("minDaysAtRisk") + if isinstance(min_days, (int, float)) and min_days < 1: + violations.append(f"timeAtRisks[{idx}].minDaysAtRisk must be >= 1") + + +def _check_trim_by_ps(value: Any, violations: List[str]) -> None: + if value is None: + return + if not _require_object("trimByPsArgs", value, violations): + return + trim = value.get("trimFraction") + if isinstance(trim, (int, float)) and (trim < 0 or trim > 1): + violations.append("trimFraction must be between 0 and 1") + bounds = value.get("equipoiseBounds") + if bounds is not None: + if not isinstance(bounds, list) or len(bounds) != 2: + violations.append("equipoiseBounds must be a two-item list or null") + elif all(isinstance(x, (int, float)) for x in bounds): + if bounds[0] < 0 or bounds[1] > 1 or bounds[0] >= bounds[1]: + violations.append("equipoiseBounds must be ordered values between 0 and 1") + + +def _check_match_on_ps(value: Any, violations: List[str]) -> None: + if value is None: + return + if not _require_object("matchOnPsArgs", value, violations): + return + ratio = value.get("maxRatio") + if isinstance(ratio, (int, float)) and ratio < 0: + violations.append("maxRatio must be >= 0") + cal = value.get("caliper") + if isinstance(cal, (int, float)) and cal < 0: + violations.append("caliper must be >= 0") + scale = value.get("caliperScale") + if scale is not None and scale not in _CALIPER_SCALE: + violations.append(f"caliperScale must be one of {sorted(_CALIPER_SCALE)}") + + +def _check_stratify_by_ps(value: Any, violations: List[str]) -> None: + if value is None: + return + if not _require_object("stratifyByPsArgs", value, violations): + return + strata = value.get("numberOfStrata") + if isinstance(strata, (int, float)) and strata < 1: + violations.append("numberOfStrata must be >= 1") + base = value.get("baseSelection") + if base is not None and base not in _BASE_SELECTION: + violations.append(f"baseSelection must be one of {sorted(_BASE_SELECTION)}") + + +def _check_create_ps(value: Any, violations: List[str]) -> None: + if value is None: + return + if not _require_object("createPsArgs", value, violations): + return + max_fit = value.get("maxCohortSizeForFitting") + if isinstance(max_fit, (int, float)) and max_fit < 0: + violations.append("maxCohortSizeForFitting must be >= 0") + control = value.get("control") + if isinstance(control, dict): + cv = control.get("cvType") + if cv is not None and cv not in _CV_TYPE: + violations.append(f"control.cvType must be one of {sorted(_CV_TYPE)}") + noise = control.get("noiseLevel") + if noise is not None and noise not in _NOISE_LEVEL: + violations.append(f"control.noiseLevel must be one of {sorted(_NOISE_LEVEL)}") + + +def _check_ps_adjustment(value: Any, violations: List[str]) -> None: + if value is None: + value = {} + if not isinstance(value, dict): + violations.append("propensityScoreAdjustment must be an object or absent") + return + _check_trim_by_ps(value.get("trimByPsArgs"), violations) + _check_match_on_ps(value.get("matchOnPsArgs"), violations) + _check_stratify_by_ps(value.get("stratifyByPsArgs"), violations) + _check_create_ps(value.get("createPsArgs"), violations) + settings = value.get("psSettings") + if settings is None: + return + if not isinstance(settings, list): + violations.append("psSettings must be a list") + else: + for idx, ps in enumerate(settings): + if not isinstance(ps, dict): + violations.append(f"psSettings[{idx}] must be an object") + continue + match = ps.get("matchOnPsArgs") + strat = ps.get("stratifyByPsArgs") + if match is not None and isinstance(match, dict): + ratio = match.get("maxRatio") + if isinstance(ratio, (int, float)) and ratio < 0: + violations.append(f"psSettings[{idx}].matchOnPsArgs.maxRatio must be >= 0") + cal = match.get("caliper") + if isinstance(cal, (int, float)) and cal < 0: + violations.append(f"psSettings[{idx}].matchOnPsArgs.caliper must be >= 0") + scale = match.get("caliperScale") + if scale is not None and scale not in _CALIPER_SCALE: + violations.append( + f"psSettings[{idx}].matchOnPsArgs.caliperScale must be one of {sorted(_CALIPER_SCALE)}" + ) + if strat is not None and isinstance(strat, dict): + strata = strat.get("numberOfStrata") + if isinstance(strata, (int, float)) and strata < 2: + violations.append(f"psSettings[{idx}].stratifyByPsArgs.numberOfStrata must be >= 2") + base = strat.get("baseSelection") + if base is not None and base not in _BASE_SELECTION: + violations.append( + f"psSettings[{idx}].stratifyByPsArgs.baseSelection must be one of {sorted(_BASE_SELECTION)}" + ) + create_ps = value.get("createPsArgs") + if isinstance(create_ps, dict): + control = create_ps.get("control") + if isinstance(control, dict): + cv = control.get("cvType") + if cv is not None and cv not in _CV_TYPE: + violations.append(f"createPsArgs.control.cvType must be one of {sorted(_CV_TYPE)}") + noise = control.get("noiseLevel") + if noise is not None and noise not in _NOISE_LEVEL: + violations.append(f"createPsArgs.control.noiseLevel must be one of {sorted(_NOISE_LEVEL)}") + + +def _check_outcome_model(value: Any, violations: List[str]) -> None: + if not _require_object("fitOutcomeModelArgs", value, violations): + return + model_type = value.get("modelType") + if model_type is not None and model_type not in _MODEL_TYPE: + violations.append(f"modelType must be one of {sorted(_MODEL_TYPE)}") + control = value.get("control") + if isinstance(control, dict): + cv = control.get("cvType") + if cv is not None and cv not in _CV_TYPE: + violations.append(f"control.cvType must be one of {sorted(_CV_TYPE)}") + noise = control.get("noiseLevel") + if noise is not None and noise not in _NOISE_LEVEL: + violations.append(f"control.noiseLevel must be one of {sorted(_NOISE_LEVEL)}") + + +_SECTION_CHECKERS = { + "getDbCohortMethodDataArgs": _check_get_db_args, + "createStudyPopArgs": _check_study_pop, + "propensityScoreAdjustment": _check_ps_adjustment, + "fitOutcomeModelArgs": _check_outcome_model, +} + + +def merge_client_metadata( + spec: Dict[str, Any], + cohort_definitions: Dict[str, Any], + negative_control: Dict[str, Any], + covariate_selection: Dict[str, Any], +) -> Dict[str, Any]: + """Return a deep copy of `spec` with client-carried metadata fields overwritten. + + Overwrites `cohortDefinitions`, `negativeControlConceptSet`, `covariateSelection`. + Leaves `name` alone (LLM-supplied). + """ + merged = deepcopy(spec) if isinstance(spec, dict) else {} + if cohort_definitions: + merged["cohortDefinitions"] = deepcopy(cohort_definitions) + if negative_control: + merged["negativeControlConceptSet"] = deepcopy(negative_control) + if covariate_selection: + merged["covariateSelection"] = deepcopy(covariate_selection) + return merged + + +def backfill_section_from_defaults( + spec: Dict[str, Any], + defaults: Dict[str, Any], + section_name: str, +) -> Dict[str, Any]: + """Return a deep copy of `spec` with `section_name` replaced by the defaults value. + + Raises ValueError for sections outside LLM_FILLED_SECTIONS. + """ + if section_name not in LLM_FILLED_SECTIONS: + raise ValueError(f"cannot backfill unknown section: {section_name}") + out = deepcopy(spec) if isinstance(spec, dict) else {} + out[section_name] = deepcopy(defaults.get(section_name, {})) + return out + + +_TAR_KEYS: Tuple[str, ...] = ("startAnchor", "riskWindowStart", "endAnchor", "riskWindowEnd") + + +def cohort_methods_spec_to_shell_recommendation( + *, + cohort_methods_spec: Dict[str, Any], + raw_description: str, + defaults_snapshot: Dict[str, Any], + profile_name: str, + input_method: str, + rec_status: str, +) -> Dict[str, Any]: + """Project a validated cohort-method spec into the 4-key recommendation shape the + cohort-methods R shell expects. + + See docs/COHORT_METHODS_SPECIFICATIONS_RECOMMENDATION_DESIGN.md §6. + """ + cspa = (cohort_methods_spec or {}).get("createStudyPopArgs") or {} + cmda = (cohort_methods_spec or {}).get("getDbCohortMethodDataArgs") or {} + if "propensityScoreAdjustment" in (cohort_methods_spec or {}): + psadj = (cohort_methods_spec or {}).get("propensityScoreAdjustment") or {} + else: + psadj = { + "trimByPsArgs": deepcopy((cohort_methods_spec or {}).get("trimByPsArgs")), + "matchOnPsArgs": deepcopy((cohort_methods_spec or {}).get("matchOnPsArgs")), + "stratifyByPsArgs": deepcopy((cohort_methods_spec or {}).get("stratifyByPsArgs")), + "createPsArgs": deepcopy((cohort_methods_spec or {}).get("createPsArgs")), + } + fmod = (cohort_methods_spec or {}).get("fitOutcomeModelArgs") or {} + + study_population: Dict[str, Any] = { + k: deepcopy(v) for k, v in cspa.items() if k not in _TAR_KEYS + } + if cmda: + study_population["cohortMethodDataArgs"] = deepcopy(cmda) + + time_at_risk: Dict[str, Any] = { + k: deepcopy(cspa[k]) for k in _TAR_KEYS if k in cspa + } + + return { + "mode": "free_text", + "input_method": input_method, + "source": "acp_flow", + "status": rec_status, + "profile_name": profile_name, + "raw_description": raw_description, + "study_population": study_population, + "time_at_risk": time_at_risk, + "propensity_score_adjustment": deepcopy(psadj), + "outcome_model": deepcopy(fmod), + "deferred_inputs": { + "function_argument_description": "implemented", + "description_file_path": "implemented", + "interactive_typed_description": "implemented", + }, + "defaults_snapshot": deepcopy(defaults_snapshot or {}), + } diff --git a/core/study_agent_core/models.py b/core/study_agent_core/models.py index 3889604..461b7e6 100644 --- a/core/study_agent_core/models.py +++ b/core/study_agent_core/models.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Literal, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class ConceptSetDiffInput(BaseModel): @@ -21,6 +21,13 @@ class PhenotypeRecommendationsInput(BaseModel): llm_result: Optional[Dict[str, Any]] = None +class PhenotypeRecommendationPlanInput(BaseModel): + study_intent: str + catalog_rows: List[Dict[str, Any]] + max_shortlist: int = 5 + llm_result: Optional[Dict[str, Any]] = None + + class PhenotypeImprovementsInput(BaseModel): protocol_text: str cohorts: List[Dict[str, Any]] @@ -38,6 +45,11 @@ class PhenotypeIntentSplitInput(BaseModel): llm_result: Optional[Dict[str, Any]] = None +class CohortMethodsIntentSplitInput(BaseModel): + study_intent: str + llm_result: Optional[Dict[str, Any]] = None + + class PhenotypeValidationReviewInput(BaseModel): disease_name: str = "" keeper_row: Dict[str, Any] = Field(default_factory=dict) @@ -214,7 +226,17 @@ class PhenotypeRecommendationsOutput(BaseModel): phenotype_recommendations: List[Dict[str, Any]] = Field(default_factory=list) mode: str catalog_stats: Dict[str, Any] = Field(default_factory=dict) - invalid_ids_filtered: List[int] = Field(default_factory=list) + invalid_ids_filtered: List[str] = Field(default_factory=list) + + +class PhenotypeRecommendationPlanOutput(BaseModel): + plan: str + intent_facets: Dict[str, Any] = Field(default_factory=dict) + shortlist_ids: List[str] = Field(default_factory=list) + needs_more_search: bool = False + reasoning_notes: List[str] = Field(default_factory=list) + mode: str + invalid_ids_filtered: List[str] = Field(default_factory=list) class PhenotypeImprovementsOutput(BaseModel): @@ -242,6 +264,18 @@ class PhenotypeIntentSplitOutput(BaseModel): mode: str +class CohortMethodsIntentSplitOutput(BaseModel): + status: Literal["ok", "needs_clarification"] + plan: str + target_statement: str + comparator_statement: str + outcome_statement: str + outcome_statements: List[str] = Field(default_factory=list) + rationale: str + questions: List[str] = Field(default_factory=list) + mode: str + + class PhenotypeValidationReviewOutput(BaseModel): label: str rationale: str @@ -256,5 +290,25 @@ class CaseCausalReviewOutput(BaseModel): diagnostics: Dict[str, Any] = Field(default_factory=dict) +class CohortMethodSpecsRecommendationInput(BaseModel): + model_config = ConfigDict(extra="forbid") + + analytic_settings_description: str + study_intent: Optional[str] = "" + study_description: Optional[str] = None + llm_result: Optional[Dict[str, Any]] = None + + +CohortMethodSpecsStatus = Literal["ok", "llm_parse_error", "schema_validation_error"] + + +class CohortMethodSpecsRecommendationOutput(BaseModel): + status: CohortMethodSpecsStatus + recommendation: Dict[str, Any] = Field(default_factory=dict) + cohort_methods_specifications: Optional[Dict[str, Any]] = None + section_rationales: Dict[str, Dict[str, Any]] = Field(default_factory=dict) + diagnostics: Dict[str, Any] = Field(default_factory=dict) + + class LLMAuditEnvelope(BaseModel): records: List[LLMAuditRecord] = Field(default_factory=list) diff --git a/core/study_agent_core/tools.py b/core/study_agent_core/tools.py index 5c1f908..e0179ca 100644 --- a/core/study_agent_core/tools.py +++ b/core/study_agent_core/tools.py @@ -2,6 +2,8 @@ from typing import Any, Dict, List, Optional, Tuple from .models import ( + CohortMethodsIntentSplitInput, + CohortMethodsIntentSplitOutput, CohortLintInput, CohortLintOutput, ConceptSetDiffInput, @@ -12,6 +14,8 @@ PhenotypeIntentSplitOutput, PhenotypeRecommendationAdviceInput, PhenotypeRecommendationAdviceOutput, + PhenotypeRecommendationPlanInput, + PhenotypeRecommendationPlanOutput, PhenotypeValidationReviewInput, PhenotypeValidationReviewOutput, PhenotypeRecommendationsInput, @@ -96,31 +100,88 @@ def apply_set_include_descendants( return cs_copy, preview +def _build_allowed_id_maps(catalog_rows: List[Dict[str, Any]]) -> tuple[Dict[str, Dict[str, Any]], Dict[str, List[str]]]: + allowed: Dict[str, Dict[str, Any]] = {} + suffix_map: Dict[str, List[str]] = {} + for row in catalog_rows or []: + phenotype_id = row.get("phenotype_id") + if phenotype_id in (None, ""): + continue + phenotype_id = str(phenotype_id) + allowed[phenotype_id] = row + if ":" in phenotype_id: + suffix = phenotype_id.rsplit(":", 1)[-1] + suffix_map.setdefault(suffix, []).append(phenotype_id) + return allowed, suffix_map + + +def _resolve_catalog_phenotype_id( + phenotype_id: Any, + allowed: Dict[str, Dict[str, Any]], + suffix_map: Dict[str, List[str]], +) -> Optional[str]: + if phenotype_id in (None, ""): + return None + text = str(phenotype_id).strip() + if not text: + return None + if text in allowed: + return text + suffix_hits = suffix_map.get(text) or [] + if len(suffix_hits) == 1: + return suffix_hits[0] + return None + + def _filter_catalog_recs( recs: List[Dict[str, Any]], catalog_rows: List[Dict[str, Any]], max_results: int, ) -> List[Dict[str, Any]]: - allowed = {r.get("cohortId"): r for r in catalog_rows if r.get("cohortId") is not None} + allowed, suffix_map = _build_allowed_id_maps(catalog_rows) cleaned = [] + seen: set[str] = set() for rec in recs or []: - cid = rec.get("cohortId") - if cid not in allowed: + phenotype_id = _resolve_catalog_phenotype_id(rec.get("phenotype_id"), allowed, suffix_map) + if phenotype_id is None or phenotype_id in seen: continue - info = allowed[cid] + info = allowed[phenotype_id] cleaned.append( { - "cohortId": cid, - "cohortName": rec.get("cohortName") or info.get("cohortName") or "", + "phenotype_id": phenotype_id, + "phenotype_name": rec.get("phenotype_name") or info.get("phenotype_name") or info.get("name") or "", "justification": rec.get("justification") or "Model justification not provided.", "confidence": rec.get("confidence"), } ) + seen.add(phenotype_id) if len(cleaned) >= max_results: break return cleaned +def _filter_shortlist_ids( + shortlist_ids: List[Any], + catalog_rows: List[Dict[str, Any]], + max_shortlist: int, +) -> tuple[List[str], List[str]]: + allowed, suffix_map = _build_allowed_id_maps(catalog_rows) + cleaned: List[str] = [] + invalid_ids = [] + for phenotype_id in shortlist_ids or []: + resolved = _resolve_catalog_phenotype_id(phenotype_id, allowed, suffix_map) + if resolved is None: + if phenotype_id not in (None, ""): + invalid_ids.append(str(phenotype_id)) + continue + if resolved in cleaned: + continue + cleaned.append(resolved) + if len(cleaned) >= max_shortlist: + break + return cleaned, sorted(set(invalid_ids)) + + def propose_concept_set_diff( concept_set: Any, study_intent: str = "", @@ -293,6 +354,69 @@ def cohort_lint( return _model_dump(output) +def phenotype_recommendation_plan( + study_intent: str, + catalog_rows: List[Dict[str, Any]], + max_shortlist: int = 5, + llm_result: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + payload = PhenotypeRecommendationPlanInput( + study_intent=study_intent, + catalog_rows=catalog_rows, + max_shortlist=max_shortlist, + llm_result=llm_result, + ) + + allowed_ids = [r.get("phenotype_id") for r in payload.catalog_rows if r.get("phenotype_id")] + max_shortlist = max(0, min(payload.max_shortlist, len(allowed_ids))) + + plan = "Select a shortlist of phenotypes for deeper review against the study intent (stub if no LLM)." + intent_facets: Dict[str, Any] = {} + shortlist_ids: List[str] = [] + invalid_ids: List[str] = [] + reasoning_notes: List[str] = [] + needs_more_search = False + mode = "llm" + + if payload.llm_result and isinstance(payload.llm_result.get("shortlist_ids"), list): + shortlist_ids, invalid_ids = _filter_shortlist_ids( + payload.llm_result.get("shortlist_ids") or [], + payload.catalog_rows, + max_shortlist, + ) + raw_intent_facets = payload.llm_result.get("intent_facets") + intent_facets = raw_intent_facets if isinstance(raw_intent_facets, dict) else {} + raw_reasoning_notes = payload.llm_result.get("reasoning_notes") + if isinstance(raw_reasoning_notes, list): + reasoning_notes = [str(note) for note in raw_reasoning_notes if note not in (None, "")] + elif isinstance(raw_reasoning_notes, str) and raw_reasoning_notes.strip(): + reasoning_notes = [raw_reasoning_notes.strip()] + else: + reasoning_notes = [] + needs_more_search = bool(payload.llm_result.get("needs_more_search")) + if payload.llm_result.get("plan"): + plan = payload.llm_result["plan"] + if not shortlist_ids and max_shortlist > 0: + mode = "llm_fallback" + shortlist_ids = [str(pid) for pid in allowed_ids[:max_shortlist]] + reasoning_notes.append("Fell back to top retrieved candidates after invalid or empty LLM shortlist.") + else: + mode = "stub" + shortlist_ids = [str(pid) for pid in allowed_ids[:max_shortlist]] + reasoning_notes = ["Stub shortlist from deterministic fallback (no LLM)."] + + output = PhenotypeRecommendationPlanOutput( + plan=plan, + intent_facets=intent_facets, + shortlist_ids=shortlist_ids, + needs_more_search=needs_more_search, + reasoning_notes=reasoning_notes, + mode=mode, + invalid_ids_filtered=invalid_ids, + ) + return _model_dump(output) + + def phenotype_recommendations( protocol_text: str, catalog_rows: List[Dict[str, Any]], @@ -306,34 +430,47 @@ def phenotype_recommendations( llm_result=llm_result, ) - allowed_ids = [r.get("cohortId") for r in payload.catalog_rows if r.get("cohortId") is not None] - allowed_set = {cid for cid in allowed_ids} + allowed_ids = [r.get("phenotype_id") for r in payload.catalog_rows if r.get("phenotype_id")] + allowed_set = {pid for pid in allowed_ids} max_results = max(0, min(payload.max_results, len(allowed_ids))) plan = "Suggest relevant phenotypes from catalog for the study intent (stub if no LLM)." recs: List[Dict[str, Any]] = [] - invalid_ids: List[int] = [] + invalid_ids: List[str] = [] mode = "llm" if payload.llm_result and isinstance(payload.llm_result.get("phenotype_recommendations"), list): raw_recs = payload.llm_result.get("phenotype_recommendations") or [] + allowed, suffix_map = _build_allowed_id_maps(payload.catalog_rows) invalid_ids = sorted( { - rec.get("cohortId") + str(rec.get("phenotype_id")) for rec in raw_recs - if rec.get("cohortId") not in allowed_set and rec.get("cohortId") is not None + if _resolve_catalog_phenotype_id(rec.get("phenotype_id"), allowed, suffix_map) is None + and rec.get("phenotype_id") not in (None, "") } ) recs = _filter_catalog_recs(raw_recs, payload.catalog_rows, max_results) if payload.llm_result.get("plan"): plan = payload.llm_result["plan"] + if not recs and payload.catalog_rows[:max_results]: + mode = "llm_fallback" + for row in payload.catalog_rows[:max_results]: + recs.append( + { + "phenotype_id": row.get("phenotype_id"), + "phenotype_name": row.get("phenotype_name") or row.get("name") or "", + "justification": "Fallback recommendation from top shortlisted candidates after invalid or empty LLM output.", + "confidence": None, + } + ) else: mode = "stub" for row in payload.catalog_rows[:max_results]: recs.append( { - "cohortId": row.get("cohortId"), - "cohortName": row.get("cohortName") or row.get("name") or "", + "phenotype_id": row.get("phenotype_id"), + "phenotype_name": row.get("phenotype_name") or row.get("name") or "", "justification": "Stub recommendation from deterministic fallback (no LLM).", "confidence": None, } @@ -500,6 +637,78 @@ def phenotype_intent_split( return _model_dump(output) +def _normalize_cohort_methods_outcomes(llm_result: Dict[str, Any]) -> tuple[str, List[str]]: + outcome_statement = str(llm_result.get("outcome_statement") or "") + outcome_statements: List[str] = [] + if isinstance(llm_result.get("outcome_statements"), list): + outcome_statements = [ + str(statement).strip() + for statement in llm_result["outcome_statements"] + if str(statement).strip() + ] + if not outcome_statements and outcome_statement.strip(): + outcome_statements = [outcome_statement.strip()] + if not outcome_statement.strip() and outcome_statements: + outcome_statement = outcome_statements[0] + return outcome_statement, outcome_statements + + +def cohort_methods_intent_split( + study_intent: str, + llm_result: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + payload = CohortMethodsIntentSplitInput( + study_intent=study_intent, + llm_result=llm_result, + ) + + if not payload.llm_result: + return {"error": "no_llm_response"} + + plan = "Extract target, comparator, and outcome cohort statements from the study intent." + target_statement = str(payload.llm_result.get("target_statement") or "") + comparator_statement = str(payload.llm_result.get("comparator_statement") or "") + outcome_statement, outcome_statements = _normalize_cohort_methods_outcomes(payload.llm_result) + rationale = str(payload.llm_result.get("rationale") or "") + questions: List[str] = [] + if isinstance(payload.llm_result.get("questions"), list): + questions = [str(q) for q in payload.llm_result["questions"]] + if payload.llm_result.get("plan"): + plan = str(payload.llm_result["plan"]) + + raw_status = str(payload.llm_result.get("status") or "").strip().lower() + missing_statements = [ + name + for name, value in ( + ("target_statement", target_statement), + ("comparator_statement", comparator_statement), + ("outcome_statements", "present" if outcome_statements else ""), + ) + if not value.strip() + ] + if raw_status not in {"ok", "needs_clarification"}: + raw_status = "needs_clarification" if missing_statements else "ok" + if raw_status == "ok" and missing_statements: + return { + "error": "invalid_cohort_methods_intent_split", + "details": "status ok requires non-empty target_statement, comparator_statement, and outcome_statements", + "missing": missing_statements, + } + + output = CohortMethodsIntentSplitOutput( + status=raw_status, + plan=plan, + target_statement=target_statement, + comparator_statement=comparator_statement, + outcome_statement=outcome_statement, + outcome_statements=outcome_statements, + rationale=rationale, + questions=questions, + mode="llm", + ) + return _model_dump(output) + + def phenotype_validation_review( disease_name: str, llm_result: Optional[Dict[str, Any]] = None, diff --git a/docs/COHORT_METHODS_SPECIFICATIONS_RECOMMENDATION_DESIGN.md b/docs/COHORT_METHODS_SPECIFICATIONS_RECOMMENDATION_DESIGN.md new file mode 100644 index 0000000..82c8a30 --- /dev/null +++ b/docs/COHORT_METHODS_SPECIFICATIONS_RECOMMENDATION_DESIGN.md @@ -0,0 +1,119 @@ +# Cohort Methods Specifications Recommendation + +This document describes an ACP recommendation flow that suggests analytic +settings from a free-text description. + +The flow endpoint is: + +```text +/flows/cohort_methods_specifications_recommendation +``` + +One available R wrapper is: + +```r +OHDSIAssistant::suggestCohortMethodSpecs() +``` + +## Purpose + +The flow converts a free-text analytic-settings description into a structured +analytic-settings recommendation. + +## Data Flow + +```text +free-text analytic-settings description + -> ACP /flows/cohort_methods_specifications_recommendation + -> MCP cohort_methods_prompt_bundle + -> LLM response with cmAnalysis-shaped specifications + -> validation and defaults backfill + -> analytic-settings recommendation +``` + +## Request + +The R wrapper sends a small snake_case request body: + +```json +{ + "study_intent": "string", + "study_description": "string", + "analytic_settings_description": "string" +} +``` + +`analytic_settings_description` is required and must be non-empty. +`study_description` mirrors the same text for compatibility with other clients. + +## Response + +The endpoint returns: + +```json +{ + "status": "ok", + "recommendation": { + "mode": "free_text", + "input_method": "typed_text", + "source": "acp_flow", + "status": "received", + "profile_name": "Recommended from free-text description", + "raw_description": "free-text analytic settings", + "study_population": {}, + "time_at_risk": {}, + "propensity_score_adjustment": {}, + "outcome_model": {}, + "deferred_inputs": { + "function_argument_description": "implemented", + "description_file_path": "implemented", + "interactive_typed_description": "implemented" + }, + "defaults_snapshot": {} + }, + "cohort_methods_specifications": {}, + "section_rationales": {}, + "diagnostics": {} +} +``` + +`status` can be: + +- `ok` +- `llm_parse_error` +- `schema_validation_error` + +`recommendation.status` can be: + +- `received`: the LLM output passed validation +- `backfilled`: at least one section was replaced with defaults + +When ACP is not connected, the R wrapper returns a local stub with the same broad +shape and `recommendation$source = "local_stub_no_acp"`. + +## Recommendation Shape + +The `recommendation` object has four analytic sections: + +- `study_population` +- `time_at_risk` +- `propensity_score_adjustment` +- `outcome_model` + +Internally, the ACP flow asks the LLM for a cmAnalysis-shaped specification and +projects that object into the four recommendation sections: + +- `study_population`: `createStudyPopArgs` without time-at-risk fields, plus + `getDbCohortMethodDataArgs` nested as `cohortMethodDataArgs` +- `time_at_risk`: `startAnchor`, `riskWindowStart`, `endAnchor`, and + `riskWindowEnd` +- `propensity_score_adjustment`: `trimByPsArgs`, `matchOnPsArgs`, + `stratifyByPsArgs`, and `createPsArgs` +- `outcome_model`: `fitOutcomeModelArgs` + +The full validated cmAnalysis-shaped object is returned as +`cohort_methods_specifications` for traceability. + +## TODO + +- Support projecting multiple cohort method analyses into the recommendation. diff --git a/docs/COHORT_METHODS_WORKFLOW.md b/docs/COHORT_METHODS_WORKFLOW.md new file mode 100644 index 0000000..26ba68d --- /dev/null +++ b/docs/COHORT_METHODS_WORKFLOW.md @@ -0,0 +1,110 @@ +**Cohort Methods Workflow** + +This document captures the current cohort-methods workflow implemented by `OHDSIAssistant::runStrategusCohortMethodsShell()` and how it fits into a broader Strategus execution pipeline. + +## Shell Workflow (Target/Comparator/Outcome + Analytic Settings) + +```mermaid +flowchart TD + A["Start: runStrategusCohortMethodsShell"] --> B["Enter Study Intent"] + B --> C["cohort_methods_intent_split"] + C --> D["Target Statement"] + C --> E["Comparator Statement"] + C --> F["Outcome Statement(s)"] + + D --> G["Target Recommendations or Cache Reuse"] + G --> H["Select Target Cohort"] + H --> I{"Do Target Improvements?"} + I -- "Yes" --> J["phenotype_improvements-target"] + J --> K{"Apply Improvements?"} + K -- "Yes" --> L["Patched Target Cohort"] + K -- "No" --> M["Keep Selected Target Cohort"] + I -- "No" --> M + + E --> N["Comparator Recommendations or Cache Reuse"] + N --> O["Select Comparator Cohort"] + O --> P{"Do Comparator Improvements?"} + P -- "Yes" --> Q["phenotype_improvements-comparator"] + Q --> R{"Apply Improvements?"} + R -- "Yes" --> S["Patched Comparator Cohort"] + R -- "No" --> T["Keep Selected Comparator Cohort"] + P -- "No" --> T + + F --> U["Outcome Recommendations or Cache Reuse"] + U --> V["Select Outcome Cohort(s)"] + V --> W{"Do Outcome Improvements?"} + W -- "Yes" --> X["phenotype_improvements-outcome"] + X --> Y{"Apply Improvements?"} + Y -- "Yes" --> Z["Patched Outcome Cohort(s)"] + Y -- "No" --> AA["Keep Selected Outcome Cohort(s)"] + W -- "No" --> AA + + L --> AB["Write Cohort Role + Comparison Artifacts"] + M --> AB + S --> AB + T --> AB + Z --> AB + AA --> AB + + AB --> AC["Capture Negative Control + Covariate Concept-Set Placeholders"] + AC --> AD{"Analytic Settings Mode"} + + AD -- "step_by_step" --> AE["Study Population Settings"] + AE --> AF["Time-at-Risk Settings"] + AF --> AG["Propensity Score Adjustment Settings"] + AG --> AH["Outcome Model Settings"] + AH --> AI["Enter Profile Name"] + AI --> AJ["Review Resolved Settings"] + + AD -- "free_text" --> AO["cohort_methods_specifications_recommendation"] + AO --> AP{"ACP Available?"} + AP -- "Yes" --> AQ["ACP Recommendation"] + AP -- "No or Error" --> AR["Local Stub/Fallback Recommendation"] + AQ --> AS["Review Recommendation"] + AR --> AS + + AJ --> AT["Confirm Analytic Settings"] + AS --> AT + AT --> AU["Write Outputs + Generate Scripts 02-06"] + AU --> AV["End"] +``` + +## Strategus Execution Context + +```mermaid +flowchart TD + A["Study Intent"] --> B["runStrategusCohortMethodsShell"] + B --> C["Outputs: cohorts + comparisons + analytic settings + scripts"] + + C --> D["03_generate_cohorts.R"] + D --> E["CohortGenerator"] + E --> F["Cohort Table in CDM"] + + C --> G["04_keeper_review.R"] + G --> H["Keeper Case Review"] + H --> I["Optional: refine phenotypes"] + I --> B + + C --> J["05_diagnostics.R"] + J --> K["CohortDiagnostics"] + + C --> L["outputs/cm_analysis_defaults.json"] + C --> M["analysis-settings/cmAnalysis.json"] + C --> N["outputs/cm_comparisons.json"] + C --> O["selected or patched cohort definitions"] + + L --> P["06_cm_spec.R"] + M --> P + N --> P + O --> P + F --> P + K --> P + + P --> Q["analysis-settings/analysisSpecification.json"] + Q --> R["Shared Cohort Resource"] + Q --> S["CharacterizationModule Spec"] + Q --> T["CohortIncidenceModule Spec"] + Q --> U["CohortMethodModule Spec"] + Q --> V["Strategus::execute"] + V --> W["CohortMethod Results + Strategus Execute Result"] +``` diff --git a/docs/PHENOTYPE_CIPHER_MAPPING.md b/docs/PHENOTYPE_CIPHER_MAPPING.md new file mode 100644 index 0000000..ef32dda --- /dev/null +++ b/docs/PHENOTYPE_CIPHER_MAPPING.md @@ -0,0 +1,926 @@ +# Phenotype Metadata Mapping: OHDSI Cohorts + VA CIPHER + +This note compares the current OHDSI Phenotype Library metadata source in `data/Cohorts.csv` with the sample VA CIPHER phenotype JSON records in `data/cipher-phenotypes/`. + +The purpose of the index is not only phenotype search. The more important use case is phenotype recommendation in support of cohort generation within the OHDSI study workflow, typically in Atlas or R Hades. That changes the design: the index must capture both topical relevance and how directly a phenotype can be turned into an OHDSI-executable cohort. + +## Inputs Reviewed + +- `data/Cohorts.csv` +- `data/phenotype_index/catalog.jsonl` +- `mcp_server/scripts/build_phenotype_index.py` +- Sample CIPHER JSON files in `data/cipher-phenotypes/` +- `data/cipher-phenotypes/enumType 1.json` +- `data/cipher-phenotypes/cipher-disease-phenotype-summary.xlsx` + +## Revised Design Goal + +The shared phenotype index should support three related tasks: + +- finding relevant phenotype definitions +- recommending phenotype definitions that are useful starting points for a study design +- preparing accepted recommendations for downstream translation into an OHDSI cohort definition + +This means the index should model both: + +- `clinical_relevance`: what condition, phenotype, or detection strategy the record represents +- `execution_readiness`: how directly the record can help produce an OHDSI cohort definition + +## Current OHDSI Index Shape + +The current index builder emits compact rows with this shape: + +- `cohortId` +- `name` +- `short_description` +- `tags` +- `ontology_keys` +- `signals` +- `logic_features` +- `pop_keywords` +- `source_meta` + +This works for OHDSI because `Cohorts.csv` is already flattened and heavily oriented around cohort logic features such as: + +- inclusion rule counts +- concept set counts +- entry-event domains +- status, librarian, dates +- hashtag tags +- recommended concept ids + +## CIPHER Structure + +Each sample CIPHER phenotype record has a stable two-level structure. + +Top level: + +- `id` +- `fullName` +- `description` +- `created` +- `lastModified` +- `revision` +- `majorRevision` +- `versionInfo` +- `uqid` +- `vaDeveloped` +- `phenotypeStatusId` +- `categoryTypeId` +- `dbType` +- `phenotypeCategory` +- `sources` +- `roleAnalyses` +- `dataClassifications` +- `keywords` +- `publications` +- `toolLinks` +- `algorithm` + +Nested `algorithm` block: + +- `algorithmDesc` +- `populationDesc` +- `validated` +- `validationDescription` +- `adjudicationPerformed` +- `adjudicationMethod` +- `adjudicationLevelType` +- `authors` +- `contacts` +- `relatedDiseases` +- `methodsUsed` +- `validations` +- `assocCodes` +- `contextDevs` +- `labSpecimen` +- `labUnits` +- `algorithmCreated` +- `dataUsedStart` +- `dataUsedEnd` +- `publicationAcknowledgement` + +## Important Differences + +OHDSI and CIPHER differ in where their recommendation value lives: + +- OHDSI stores useful retrieval features in flattened cohort-logic columns and executable cohort definitions. +- CIPHER stores useful retrieval features in narrative algorithm text, keyword lists, source lists, validation fields, and associated code systems. +- Most CIPHER records in the provided sample do not contain executable OHDSI logic. + +Practical consequence: + +- OHDSI is stronger on immediate cohort adaptation. +- CIPHER is stronger on disease relevance, provenance, validation context, and code evidence. +- CIPHER recommendations often need a second step that translates narrative and code evidence into an OHDSI cohort definition. + +That means a shared index should not force both sources into the same narrow `logic_features` mold, and it should not treat all recommended phenotypes as equally executable. + +## Proposed Shared Metadata Model + +Use a source-agnostic phenotype document with a stable core plus source-specific extensions. + +### Core Fields + +- `phenotype_id` +- `source_dataset` +- `source_record_type` +- `name` +- `short_description` +- `long_description` +- `tags` +- `keywords` +- `signals` +- `ontology_keys` +- `code_systems` +- `concept_evidence` +- `validation_features` +- `population_features` +- `provenance` +- `retrieval_text` +- `source_meta` +- `source_payload_ref` + +### Execution-Oriented Fields + +- `executable_definition_status` +- `executable_definition_source` +- `execution_readiness_score` +- `adaptation_notes` +- `translation_inputs` + +### Recommended Meanings + +- `phenotype_id`: Canonical string id, namespaced by source, such as `ohdsi:3` or `cipher:16285`. +- `source_dataset`: `ohdsi_phenotype_library` or `va_cipher`. +- `source_record_type`: `cohort_definition` or `disease_phenotype`. +- `name`: Display name. +- `short_description`: One-sentence summary used in results lists. +- `long_description`: Rich narrative text for ranking and recommendation context. +- `tags`: User-facing labels for faceting and retrieval. +- `keywords`: Expanded search terms from names, tags, keywords, disease labels, authors, source labels, code-system labels. +- `signals`: Compact flags such as status, validated, va-developed, reference, has-publication, has-tool-link. +- `ontology_keys`: Numeric or string identifiers from concept systems when available. +- `code_systems`: Normalized summary of coded algorithm content. +- `concept_evidence`: Compact representation of code-derived evidence that may later be enriched through vocabulary or PHOEBE tools. +- `validation_features`: Structured summary of validation and adjudication signals. +- `population_features`: Structured summary of study population or cohort target information. +- `provenance`: Source and authorship metadata useful for ranking and display. +- `retrieval_text`: Explicit text blob used for sparse and dense indexing. +- `source_meta`: Raw-ish compact metadata for debugging and UI. +- `source_payload_ref`: Path or id pointing back to the original source definition. +- `executable_definition_status`: `native_ohdsi`, `non_ohdsi_logic_only`, `codes_only`, `narrative_only`, or `unknown`. +- `executable_definition_source`: where the executable or semi-executable logic came from, such as `ohdsi_library`, `cipher_json`, or `external_reference`. +- `execution_readiness_score`: coarse ranking signal for recommendation and UI prioritization. +- `adaptation_notes`: short explanation of what still needs to be done before OHDSI execution. +- `translation_inputs`: compact payload intended for a future ACP flow that translates accepted phenotype recommendations into OHDSI cohort candidates. + +## Mapping: OHDSI -> Shared Model + +- `cohortId` -> `phenotype_id` as `ohdsi:{cohortId}` +- literal source -> `source_dataset=ohdsi_phenotype_library` +- literal type -> `source_record_type=cohort_definition` +- `cohortName` / `cohortNameLong` / `cohortNameFormatted` -> `name` +- `logicDescription` or `notes` -> `short_description` +- `logicDescription`, `notes`, definition JSON description if present -> `long_description` +- `hashTag` -> `tags` +- tokenized `name` + description + tags -> `keywords` +- `recommendedReferentConceptIds` -> `ontology_keys` +- current status/reference/washout booleans -> `signals` +- inclusion/domain/count fields -> `population_features.logic_features` +- librarian/dates/version fields -> `provenance` and `source_meta` +- executable cohort JSON presence -> `executable_definition_status=native_ohdsi` +- OHDSI cohort JSON -> `executable_definition_source=ohdsi_library` +- cohort JSON path, logic summary, domains, referent concepts -> `translation_inputs` + +## Mapping: CIPHER -> Shared Model + +- `id` -> `phenotype_id` as `cipher:{id}` +- literal source -> `source_dataset=va_cipher` +- literal type -> `source_record_type=disease_phenotype` +- `fullName` -> `name` +- `description` if present else `algorithm.algorithmDesc` -> `short_description` +- concatenate `description`, `algorithm.algorithmDesc`, `algorithm.populationDesc`, `validationDescription`, publication acknowledgement -> `long_description` +- `keywords[*].keyword`, `phenotypeCategory`, inferred source labels, code-system labels -> `tags` and `keywords` +- `algorithm.relatedDiseases[*].relatedDiseaseId` -> provisional `ontology_keys` only after enum expansion; otherwise keep outside `ontology_keys` +- `vaDeveloped`, `phenotypeStatusId`, `majorRevision`, `validated`, publication presence, tool-link presence -> `signals` +- `algorithm.populationDesc`, `contextDevs`, `dataUsedStart`, `dataUsedEnd` -> `population_features` +- `algorithm.validated`, `validationDescription`, `adjudicationPerformed`, `adjudicationMethod`, `adjudicationLevelType`, `validations` -> `validation_features` +- `sources`, `authors`, `contacts`, `publications`, `versionInfo`, `revision`, `created`, `lastModified`, `uqid` -> `provenance` and `source_meta` +- `algorithm.assocCodes` -> `code_systems` +- raw JSON path -> `source_payload_ref` +- default executable status for most records -> `codes_only` or `narrative_only` +- future true logical algorithm if present -> `non_ohdsi_logic_only` +- disease summary, algorithm narrative, code evidence, validation, and provenance -> `translation_inputs` + +## Execution Readiness + +The recommendation system should distinguish “good conceptual match” from “good starting point for Atlas/Hades implementation.” + +Recommended values for `executable_definition_status`: + +- `native_ohdsi`: already represented as an OHDSI cohort definition +- `non_ohdsi_logic_only`: contains a logical algorithm, but not in OHDSI-executable form +- `codes_only`: primarily useful because of code evidence +- `narrative_only`: primarily useful because of descriptive or methodological text +- `unknown`: unresolved + +Recommended ranking behavior: + +- do not suppress CIPHER phenotypes just because they are less executable +- do boost OHDSI phenotypes when user intent strongly implies immediate Atlas/Hades adaptation +- do surface execution status and adaptation notes in recommendation output + +Typical `adaptation_notes` examples: + +- `Native OHDSI cohort likely requires parameter or concept-set adjustment for local study intent.` +- `CIPHER phenotype provides code evidence and narrative but requires translation into OHDSI cohort entry, exit, and era logic.` +- `Phenotype appears derived from PheCode/MAP methodology and may need concept expansion and validation against available OMOP domains.` + +## Translation Inputs + +`translation_inputs` should be designed now as the future handoff payload for the ACP flow that converts an accepted recommendation into an OHDSI-oriented cohort draft. + +Recommended content for OHDSI records: + +- cohort id +- cohort JSON path +- logic description +- referent concept ids +- domain features +- inclusion rule counts +- concept set counts + +Recommended content for CIPHER records: + +- phenotype id and JSON path +- phenotype name +- disease summary +- algorithm narrative +- population description +- validation description +- code systems and codes +- source family labels +- publication links +- tool links +- source provenance +- code-generation method notes if present + +## Normalized `code_systems` Shape + +This is the most important CIPHER addition for retrieval, recommendation, and later translation. + +Recommended shape: + +```json +[ + { + "system_id": 460, + "system_name": "ICD-9 Diagnostic Codes", + "subsystem_id": null, + "subsystem_name": null, + "codes": ["309.81"], + "description": null, + "va_specific": false + } +] +``` + +For the sample, `enumType 1.json` is enough to map at least the associated code systems. Examples already present in the sample: + +- `460` -> `ICD-9 Diagnostic Codes` +- `461` -> `ICD-10 Diagnostic Codes` +- `466` -> `Medications` +- `468` -> `Text snippets` +- `469` -> `SNOMED CT, US Edition` +- `519` -> `Other` +- nested `682` -> `Drug class` +- nested `785` appears in sample data but is not defined in `enumType 1.json`; this needs an additional enum source + +Recommendation: + +- keep both numeric ids and resolved labels +- flatten actual code strings into retrieval text +- add code-system labels into keywords +- preserve `va_specific` for downstream filtering + +## Proposed `concept_evidence` + +This field is intended to preserve code-derived evidence without turning the phenotype index into a full terminology mirror. + +Recommended compact shape: + +```json +{ + "coded_terms": [ + { + "system": "ICD-10 Diagnostic Codes", + "codes": ["F43.10", "F43.11", "F43.12"], + "labels": [], + "omop_candidates": [], + "embedding_terms": [] + } + ], + "coverage_summary": { + "has_codes": true, + "has_labels": false, + "has_omop_mapping": false + } +} +``` + +Indexing recommendation: + +- always preserve raw codes and code-system labels +- do not require concept names to be present +- if names are present in source data, store them +- if names are absent, leave them empty and enrich later when needed + +## Vocabulary and PHOEBE Enrichment Strategy + +The Study Agent now has MCP tools such as vocabulary vector search and PHOEBE related concept retrieval. That should influence the design, but not force large up-front enrichment. + +Recommended staged strategy: + +1. Base indexing: +- raw codes +- raw keywords +- narrative text +- execution-readiness metadata + +2. Lightweight enrichment at index build time when cheap: +- add code-system labels +- add any source-provided code descriptions +- derive embedding text from names, keywords, code-system names, and available labels + +3. Deferred enrichment after shortlist or acceptance: +- use `vocab_search_standard` to find likely OMOP standard concepts and labels +- use `phoebe_related_concepts` to gather nearby standard concepts useful for concept-set drafting or adaptation +- attach only compact summaries to the recommendation or downstream translation payload + +This keeps indexing tractable for 7K phenotypes while leaving room for stronger recommendation and translation support later. + +## PheCode / MAP / MVP / GWPheWAS Phenotypes + +Many CIPHER phenotypes appear to be derived from PheCode-based or related analytical methods. The tags mentioned for these kinds of records include: + +- `MAP` +- `MVP` +- `GW` +- `GWPheWAS` +- `PheCode` + +These should be treated as important methodological metadata, not just search tags. + +Why this matters: + +- they tell the user that the phenotype may have been generated from a statistical or analytical grouping process rather than from a hand-authored executable cohort definition +- they may imply different expectations for portability, specificity, and direct cohort executability +- they provide useful context for the later translation step into OHDSI logic + +Recommended handling: + +- preserve these tags in `tags` +- normalize them into `signals`, for example `method_family:phecode`, `method_family:map`, `method_family:gwphewas` +- extract short methodology blurbs from `description` or `algorithm.algorithmDesc` into a dedicated note inside `translation_inputs` +- boost their visibility in UI output so users understand what kind of phenotype evidence they are accepting + +Recommended optional field inside `translation_inputs`: + +```json +{ + "methodology_context": { + "family_tags": ["MAP", "PheCode"], + "summary": "Phenotype appears derived from PheCode-based analytical grouping with NLP and coding-frequency augmentation.", + "translation_cautions": [ + "May require OMOP concept-set expansion rather than direct code copy.", + "May represent a probabilistic or empirically derived grouping rather than a directly executable cohort algorithm." + ] + } +} +``` + +This is especially important because some phenotype descriptions include short blurbs describing how the codes were generated. Those blurbs should be preserved as user-facing context and downstream translation context. + +## Proposed `signals` + +Keep `signals` as cheap ranking and filtering hints, but expand them to support both sources: + +- `source:ohdsi` +- `source:cipher` +- `status:` +- `validated` +- `not_validated` +- `va_developed` +- `major_revision` +- `has_publication` +- `has_tool_link` +- `has_contact` +- `has_code_system:icd9` +- `has_code_system:icd10` +- `has_code_system:snomed` +- `has_code_system:medication` +- `reference` +- `washout` +- `method_family:phecode` +- `method_family:map` +- `method_family:mvp` +- `method_family:gw` +- `method_family:gwphewas` +- `execution:native_ohdsi` +- `execution:codes_only` +- `execution:narrative_only` + +## Proposed `provenance` + +Recommended compact shape: + +```json +{ + "created_at": "...", + "modified_at": "...", + "version": "...", + "status": "...", + "authors": ["..."], + "contacts": ["..."], + "sources": ["..."], + "publications": [ + {"title": "...", "link": "..."} + ], + "maintainer": "..." +} +``` + +OHDSI can populate this from librarian/version/date fields. + +CIPHER can populate this from: + +- `created` +- `lastModified` +- `versionInfo` +- `phenotypeStatusId` +- `algorithm.authors` +- `algorithm.contacts` +- `sources` +- `publications` + +## Retrieval Text Strategy + +The current builder embeds only: + +- `name` +- `short_description` +- `pop_keywords` + +That is too narrow for CIPHER and too narrow for execution-aware recommendation. + +Recommended `retrieval_text` should include: + +- `name` +- `short_description` +- `long_description` +- tags and keywords +- code-system labels +- actual code strings +- source labels +- author labels when institution-like +- validation summary +- population summary +- execution-readiness cues +- PheCode or MAP methodology blurbs when present + +This should improve: + +- phenotype search by disease name +- recommendation by methodological similarity +- retrieval by terminology system +- retrieval by VA-specific provenance +- shortlist quality for later translation into OHDSI cohort logic + +## Batch Import Notes + +The workbook `cipher-disease-phenotype-summary.xlsx` does not appear to be the full metadata source for indexing. + +Observed properties: + +- first sheet has 3 columns: path, name, description +- another sheet has 2 columns for records missing descriptions +- additional sheets appear grouped by source family such as `CCW`, `SNOMED`, `Read code`, `PheCode`, `CART`, `ICD`, `HDR UK`, `Elixhauser`, `MAP` + +This workbook looks useful for: + +- inventory reconciliation +- coverage checks +- missing-description handling +- source-family grouping + +But the JSON files should remain the primary indexing source because they carry the richer metadata. + +## Gaps / Follow-Up Needed + +Some CIPHER ids in the sample are coded but not fully resolvable from `enumType 1.json` alone. + +Examples: + +- `phenotypeStatusId` +- `categoryTypeId` +- `phenotypeSourceId` +- `phenotypeRoleId` +- `phenotypeClassId` +- `visualToolId` +- `contextId` +- `methodUsedId` +- `relatedDiseaseId` +- some `subCodeType` values such as `785` + +Recommendation: + +1. Look for additional enum exports in the full CIPHER metadata dump. +2. Keep raw ids in the schema even when labels are unavailable. +3. Add best-effort labels only where the enum file is authoritative. + +## Recommended Indexer Direction + +Minimal-disruption path: + +1. Rename the internal concept from `cohort` to `phenotype` in catalog rows while keeping backward-compatible aliases if needed. +2. Introduce a new source-aware builder layer: +- OHDSI row parser +- CIPHER JSON parser +3. Emit one shared catalog schema for both sources. +4. Expand sparse and dense text inputs to use `retrieval_text`. +5. Add execution-readiness fields and `translation_inputs` so recommendation output is future-compatible with an ACP translation flow. +6. Preserve source-specific detail inside `source_meta`, `population_features`, `validation_features`, `code_systems`, and `concept_evidence`. +7. Keep vocabulary and PHOEBE enrichment lightweight during indexing and use deeper enrichment later during shortlist or acceptance. + +This keeps the current retrieval architecture intact while broadening the metadata foundation enough for mixed-source search, recommendation, and later cohort-translation workflows. + +## Next Sprint + +The current implementation now supports a mixed OHDSI + CIPHER catalog with shared `phenotype_id`-based rows, execution-readiness metadata, source-aware retrieval text, and lightweight CIPHER code-system normalization. + +The next sprint should focus on improving retrieval quality rather than expanding workflow scope. + +### Priorities + +1. Add derived retrieval keywords. +- Keep raw tags and raw source keywords. +- Add a new compact `retrieval_keywords` field produced from a constrained LLM extraction step. +- Optimize for short clinically meaningful terms, phenotype method terms, population cues, and execution cues. +- Avoid stop words, long narrative fragments, and generic filler. + +2. Add human-readable labels for coded evidence. +- Preserve raw codes as canonical structured data. +- Add concept or code labels wherever they can be resolved cheaply. +- Include those labels in retrieval-oriented text fields so ANN and sparse matching benefit from human-readable clinical language. + +3. Add OHDSI concept-set evidence to the index. +- Parse OHDSI cohort JSON concept sets during indexing. +- Retain concept ids, vocabulary ids, concept names, and lightweight grouping information. +- Represent OHDSI concept evidence in a normalized way parallel to CIPHER `code_systems` / `concept_evidence`. +- Use this mainly for disambiguation and scope refinement, not just direct matching. + +4. Separate raw metadata from derived retrieval metadata. +- Keep source-faithful fields such as raw tags, raw keywords, raw code systems, and provenance. +- Add derived fields such as `retrieval_keywords`, `retrieval_concept_labels`, and `methodology_summary`. +- Treat retrieval-facing derived fields as index optimization artifacts, not replacements for source metadata. + +5. Keep enrichment staged. +- Stage 1: source parsing plus cheap label extraction. +- Stage 2: offline LLM keyword derivation with caching. +- Stage 3: optional deeper vocabulary and PHOEBE enrichment for shortlist or acceptance flows rather than bulk index-time expansion. + +### Design Guidance + +- Do not let concept-level detail overwhelm phenotype-level meaning. +- Do not depend on full OMOP mapping completeness before adding label enrichment. +- Prefer compact derived features over verbose copied prose. +- Preserve enough structure so a future cohort-translation ACP flow can reuse the same indexed evidence. + +### Suggested Evaluation + +Before another major schema revision, assemble a small set of representative phenotype recommendation queries and compare: + +- current mixed-source index +- index plus derived retrieval keywords +- index plus concept-label enrichment +- index plus both + +Use that comparison to decide how much weight should come from narrative similarity versus concept evidence versus execution readiness. + +## Recommendation Flow Refactor: Bounded Agentic Retrieval + +The current `phenotype_recommendation` flow is a one-shot reranker: + +- run `phenotype_search` +- truncate to a small candidate list +- send thin candidate rows to the LLM +- ask for final recommendations in one pass + +This is not a good fit for a small local chat-completion model. With `candidate_limit` kept low for latency and context reasons, the model often does not see the best executable OHDSI candidates. If `candidate_limit` is raised, the model receives more noise but not enough structured evidence to discriminate among phenotype types. + +### Current Limitation + +The recommendation LLM currently sees only the thin search payload: + +- `phenotype_id` +- `source_dataset` +- `name` +- `short_description` +- `tags` +- `signals` +- `executable_definition_status` +- `execution_readiness_score` +- search scores + +It does **not** see the richer indexed evidence already available in the catalog and definitions: + +- `retrieval_keywords` +- `retrieval_concept_labels` +- `methodology_summary` +- `code_systems` +- `concept_evidence` +- `adaptation_notes` +- full definition payloads under `definitions/` + +This causes several failure patterns: + +- complication or severity phenotypes can outrank diagnosis phenotypes for disease-identification intents +- repair/procedure phenotypes can outrank diagnosis phenotypes +- HDR UK / Read-code-centric phenotypes can outrank more locally useful OHDSI or VA-compatible alternatives +- medication-based intents are poorly separated from diagnosis-code phenotypes +- execution readiness is present but under-informed because the model lacks the phenotype details needed for discrimination + +### Design Goal + +Refactor recommendation into a **bounded agentic retrieval flow** that lets a small model request more evidence only for promising candidates. + +This should remain tightly controlled: + +- no open-ended tool loop +- bounded number of tool calls +- bounded prompt sizes +- explicit intermediate JSON schemas +- deterministic fallbacks still available + +### Recommended Flow + +#### Stage 1: Intent Facet Extraction + +Use a small LLM or deterministic parser to extract compact facets from the study intent. + +Suggested facets: + +- condition or topic +- phenotype role: diagnosis, outcome, screening, severity, procedure, medication-based, risk score +- care setting: outpatient, inpatient, ED, any +- population cue: VA, veteran, older adults, pediatric, etc. +- validation preference +- OHDSI executability preference +- geography or coding preference if inferable: US/OMOP vs UK/Read-code + +This output should be small and cheap. + +#### Stage 2: Broad Recall Search + +Run `phenotype_search` as a recall step, not the final ranking step. + +Recommended changes: + +- use a larger search window than the final LLM shortlist, for example `top_k=20..40` +- keep the returned search rows compact +- allow optional search-time boosts or filters later based on extracted facets + +The goal here is to avoid losing the best OHDSI candidate before the model can inspect it. + +#### Stage 3: LLM Shortlist Decision + +Ask the LLM to select a small shortlist of phenotype ids for deeper inspection, not final recommendations yet. + +Output should include: + +- inferred facet summary +- shortlist phenotype ids +- optional reasons each candidate needs more evidence +- optional indication that another search page or reformulated query is needed + +This first reasoning step should operate over compact rows only. + +#### Stage 4: Targeted Evidence Hydration + +For the shortlisted ids, fetch richer evidence using existing MCP tools: + +- `phenotype_fetch_summary` +- `phenotype_fetch_definition` for only the most ambiguous finalists + +Recommended fetch policy: + +- summary fetches: up to 6 to 8 candidates +- definition fetches: up to 2 to 3 candidates +- at most 2 search rounds total + +The summary payload is likely sufficient for most decisions if it includes: + +- `retrieval_keywords` +- `retrieval_concept_labels` +- `methodology_summary` +- `signals` +- `code_systems` +- `executable_definition_status` +- `execution_readiness_score` +- `adaptation_notes` + +Definition fetches should be reserved for cases such as: + +- OHDSI candidates with similar names but different inclusion logic +- CIPHER candidates whose methodology or scope is ambiguous +- medication-based or multi-domain intents where code evidence matters + +#### Stage 5: Final Recommendation Synthesis + +Ask the LLM for the final recommendation only after evidence hydration. + +Final output should still remain compact: + +- up to `max_results` +- concise justification +- optional confidence +- ideally an indication of whether the phenotype is immediately executable or requires translation + +### Suggested Tool / Flow Changes + +This does not require inventing an unrestricted autonomous agent. It requires a controlled multi-step ACP flow. + +#### Keep Existing Tools + +- `phenotype_search` +- `phenotype_fetch_summary` +- `phenotype_fetch_definition` +- `phenotype_prompt_bundle` + +#### Add or Refactor ACP Stages + +Add a new intermediate ACP planning task for phenotype recommendation, for example: + +- `phenotype_recommendation_plan` + +Possible output shape: + +- `intent_facets` +- `shortlist_ids` +- `needs_more_search` +- `reasoning_notes` + +Then keep the current final task, but rename conceptually to: + +- `phenotype_recommendations_finalize` + +Its input should be hydrated candidate summaries rather than thin search rows. + +### Why This Is Better for a 4B Model + +A small model is more reliable when it must: + +- compare a modest number of compact candidates +- request richer evidence selectively +- reason over a short hydrated shortlist + +A small model is less reliable when it must: + +- rerank a large noisy list in one pass +- infer phenotype type from minimal text +- compensate for missing structured evidence + +The bounded agentic design shifts difficulty away from long-context semantic reranking and toward staged discrimination, which is a better fit for air-gapped small-model deployment. + +### Immediate Implementation Priority + +The next implementation step should be a **two-pass ACP refactor**, not a full free-form loop. + +Recommended first increment: + +1. add an intermediate planning schema for shortlist selection +2. run `phenotype_search` with broader recall +3. fetch summaries for shortlisted ids +4. run a second LLM call for final recommendation + +This is simpler than a full agent loop and should address the main current failure modes without materially increasing model size requirements. + +## Offline Recommendation Metadata Extraction + +The next indexing revision should add a second compact offline LLM extraction pass focused specifically on phenotype recommendation quality. + +This is different from the existing `retrieval_keywords` pass: + +- `retrieval_keywords` improves lexical and embedding retrieval surfaces +- recommendation metadata should improve semantic discrimination between primary phenotype topic, phenotype role, care setting, and misleading study-context mentions + +### Design Rationale + +The current recommendation failures are not only caused by retrieval weights. They are caused by the index still mixing: + +- phenotypes that are primarily about a disease or diagnosis +- phenotypes that are primarily about a complication, severity index, or downstream outcome +- phenotypes that are primarily baseline covariates or comorbidities used in a study about another condition +- phenotypes whose source narrative mentions a disease only as study context rather than as the target phenotype topic + +A brittle rule-based classifier is not the preferred fix. The better approach is to use an offline constrained LLM extraction step during indexing, similar in spirit to the current keyword derivation pass, and then store compact structured fields that can be used deterministically at recommendation time. + +### Proposed Extracted Fields + +Recommended recommendation-oriented fields: + +- `primary_clinical_topic` +- `secondary_topics` +- `phenotype_role` +- `care_setting_scope` +- `population_scope` +- `topic_mentions.primary_topics` +- `topic_mentions.context_only_topics` +- `topic_mentions.downstream_or_related_topics` +- `target_vs_context_conditions.target_conditions` +- `target_vs_context_conditions.context_conditions` +- `exclude_from_primary_topic_match` +- `recommendation_summary` + +Recommended meanings: + +- `primary_clinical_topic`: the narrow main phenotype topic the definition is intended to identify or characterize +- `secondary_topics`: clinically relevant related topics that are not the main phenotype topic +- `phenotype_role`: one of `diagnosis`, `outcome`, `complication`, `severity`, `screening`, `procedure`, `medication_based`, `risk_score`, `comorbidity_covariate`, `mixed`, or `unknown` +- `care_setting_scope`: `outpatient`, `inpatient`, `ed`, `mixed`, or `unspecified` +- `population_scope`: compact population cue if explicit, such as veterans or older adults +- `topic_mentions.primary_topics`: topics that are genuinely central to the phenotype itself +- `topic_mentions.context_only_topics`: topics mentioned only because of study context or deployment context +- `topic_mentions.downstream_or_related_topics`: complications, sequelae, outcomes, or closely related topics that should not be confused with the primary topic +- `target_vs_context_conditions.target_conditions`: conditions the phenotype is actually targeting +- `target_vs_context_conditions.context_conditions`: conditions present only as study background, index disease context, or downstream follow-up context +- `exclude_from_primary_topic_match`: short phrases useful for down-weighting misleading topic matches at recommendation time +- `recommendation_summary`: a compact recommendation-oriented statement of the phenotype's real focus + +### Expected Input To The Extraction Prompt + +The offline extraction step should be grounded in compact source-aware evidence already available in the index build: + +- `name` +- `short_description` +- `methodology_summary` +- `retrieval_keywords` +- `retrieval_concept_labels` +- `signals` +- `executable_definition_status` +- `execution_readiness_score` +- optionally a compact `long_description` excerpt if short description is sparse + +It should not require the full raw source JSON for every pass unless later evaluation shows that more context is necessary. + +### Expected Runtime Use + +These extracted fields should support recommendation in three ways: + +1. candidate display and summary hydration +- expose them through `phenotype_fetch_summary` +- use them in the hydrated candidate payload sent to the final recommendation step + +2. planner grounding +- the recommendation planning prompt should rely on these fields instead of inferring everything from noisy source narratives +- the planner should be able to distinguish primary topic from study context without having to reconstruct that distinction itself + +3. retrieval text and ranking +- selected fields such as `primary_clinical_topic`, `secondary_topics`, `phenotype_role`, and `recommendation_summary` should contribute to `retrieval_text` +- `context_only_topics` and `exclude_from_primary_topic_match` should not be treated as positive primary-topic evidence + +### Example Failure This Should Fix + +For a phenotype like `Hypertension (VA CAUSAL Methods)` that was used in a COVID-19 vaccine effectiveness study, the extracted metadata should look conceptually like: + +- `primary_clinical_topic`: hypertension +- `phenotype_role`: comorbidity_covariate +- `topic_mentions.context_only_topics`: COVID-19 +- `target_vs_context_conditions.target_conditions`: hypertension +- `target_vs_context_conditions.context_conditions`: COVID-19 +- `exclude_from_primary_topic_match`: used in COVID-19 outcomes study; baseline covariate phenotype + +That would let the recommendation stack avoid treating it as a COVID-19 diagnosis phenotype. + +For a phenotype like `Long COVID-19 (LATCH)`, the extracted metadata should preserve that it is a COVID-related phenotype while distinguishing it from incident acute COVID-19 diagnosis. + +### Prompt Artifacts + +The design prompt bundle for this extraction lives under: + +- `mcp_server/prompts/phenotype/overview_phenotype_index_recommendation_metadata.md` +- `mcp_server/prompts/phenotype/spec_phenotype_index_recommendation_metadata.md` +- `mcp_server/prompts/phenotype/output_schema_phenotype_index_recommendation_metadata.json` + +### Recommended Next Implementation Step + +Implement this as a second offline cached LLM pass in `build_phenotype_index.py`, parallel to the current keyword derivation flow: + +- build a compact source-aware prompt payload per phenotype +- cache results by phenotype id plus source hash +- write normalized fields into each catalog row +- expose them in `phenotype_fetch_summary` +- update the two-pass recommendation flow to use these fields during planning and final recommendation + +This should be attempted before adding brittle deterministic phenotype-role heuristics. diff --git a/docs/PHENOTYPE_INDEXING.md b/docs/PHENOTYPE_INDEXING.md index 3874158..d6b572c 100644 --- a/docs/PHENOTYPE_INDEXING.md +++ b/docs/PHENOTYPE_INDEXING.md @@ -1,35 +1,232 @@ -**Phenotype Indexing (MCP)** +**Phenotype Indexing** -This guide explains how to build the phenotype index used by the MCP `phenotype_search` tool. +This guide explains how to build the phenotype retrieval index used by the MCP phenotype search and phenotype recommendation flows. -**Inputs** -1. Metadata CSV (e.g., from the [OHDSI Phenotype Library](https://github.com/OHDSI/PhenotypeLibrary.git) `insta/Cohorts.csv`) -2. Cohort definition JSON files (e.g., from the [OHDSI Phenotype Library](https://github.com/OHDSI/PhenotypeLibrary.git) `inst/cohorts/` folder) +**What The Builder Produces** +The index builder now supports a split workflow: -**Required Environment** -- `EMBED_URL` (OpenAI-compatible embedding endpoint) -- `EMBED_MODEL` (embedding model name) -- `EMBED_API_KEY` (optional if the endpoint requires auth) +1. Source parsing and sparse indexing +- builds `catalog.jsonl` +- builds `sparse_index.pkl` +- copies phenotype definition JSON into `definitions/` +- optionally derives compact LLM-assisted `retrieval_keywords` + +2. Dense indexing +- builds `dense.index` +- builds or updates `embedding_cache.pkl` +- can run during the main build or later in a dense-only pass using an existing `catalog.jsonl` + +**Supported Source Inputs** +You can build from either or both of these source families: + +1. OHDSI / OMOP phenotype library inputs +- metadata CSV such as `data/Cohorts.csv` +- cohort definition JSON directory such as `data/cohorts/` + +2. VA CIPHER phenotype inputs +- phenotype JSON directory such as `data/cipher-phenotypes/` +- enum JSON such as `data/cipher-phenotypes/enumType 1.json` + +**Important Path Rule** +`PHENOTYPE_INDEX_DIR` points to the built index output directory. +It does not control where OHDSI or CIPHER source files reside. +The source files can live anywhere as long as you pass their paths on the command line. + +**Prerequisites** +Minimum prerequisites for catalog plus sparse index: +- Python environment with this repo installed +- OHDSI and/or CIPHER source files + +Additional prerequisites for LLM-derived retrieval keywords: +- `LLM_API_URL` +- `LLM_API_KEY` +- `LLM_MODEL` + +Additional prerequisites for dense index creation: +- `numpy` +- `faiss` +- `EMBED_URL` +- `EMBED_MODEL` +- `EMBED_API_KEY` if your embedding endpoint requires auth + +**Main Build Command** +This builds the catalog, sparse index, metadata, copied definitions, and optional keyword cache. + +```bash +python mcp_server/scripts/build_phenotype_index.py \ + --metadata-csv data/Cohorts.csv \ + --definitions-dir data/cohorts \ + --cipher-dir data/cipher-phenotypes \ + --cipher-enum "data/cipher-phenotypes/enumType 1.json" \ + --output-dir data/phenotype_index_cipher_omop +``` + +**Enable LLM-Derived Retrieval Keywords** +This uses chat completion to derive compact `retrieval_keywords` and writes successful generations to `keyword_cache.jsonl`. -**Command** ```bash python mcp_server/scripts/build_phenotype_index.py \ - --metadata-csv /path/to/Cohorts.csv \ - --definitions-dir /path/to/cohorts \ - --output-dir /path/to/phenotype_index \ - --build-dense -``` - -**Outputs** -The output directory will contain: -1. `catalog.jsonl` – compact phenotype documents -2. `sparse_index.pkl` – pure‑Python BM25 index -3. `dense.index` – FAISS index (if `--build-dense` is enabled) -4. `meta.json` – index metadata (embedding model, build time, counts) -5. `definitions/` – copies of cohort JSON definitions - -**Notes** -1. If FAISS/numpy are not installed, omit `--build-dense` or install them first. -2. Indexing is safe to run repeatedly; it rebuilds the directory contents. -3. Set `PHENOTYPE_INDEX_DIR` in your MCP environment to point at the output directory (prefer an absolute path). -4. If `PHENOTYPE_INDEX_DIR` is not set, MCP falls back to the repo-relative default `data/phenotype_index`. + --metadata-csv data/Cohorts.csv \ + --definitions-dir data/cohorts \ + --cipher-dir data/cipher-phenotypes \ + --cipher-enum "data/cipher-phenotypes/enumType 1.json" \ + --derive-keywords-llm \ + --output-dir data/phenotype_index_cipher_omop +``` + +Notes: +- `keyword_cache.jsonl` is append-oriented and is written incrementally as successful LLM keyword generations occur. +- If no LLM keyword generations succeed, no cache file is created. +- The built `catalog.jsonl` still contains `retrieval_keywords` even when they come from the heuristic fallback rather than the LLM. + +**Build Sparse And Dense In One Pass** +Use this when embedding infrastructure is available and you want the full index in one run. + +```bash +python mcp_server/scripts/build_phenotype_index.py \ + --metadata-csv data/Cohorts.csv \ + --definitions-dir data/cohorts \ + --cipher-dir data/cipher-phenotypes \ + --cipher-enum "data/cipher-phenotypes/enumType 1.json" \ + --derive-keywords-llm \ + --build-dense \ + --output-dir data/phenotype_index_cipher_omop +``` + +If the build must fail when dense indexing cannot be produced, add: + +```bash +--require-dense +``` + +**Build Dense Later From An Existing Catalog** +Use this when the catalog and sparse index already exist and you want to add `dense.index` afterward. + +```bash +python mcp_server/scripts/build_phenotype_index.py \ + --output-dir data/phenotype_index_cipher_omop \ + --build-dense \ + --dense-only +``` + +This mode: +- reads the existing `catalog.jsonl` +- builds `dense.index` +- updates `embedding_cache.pkl` +- updates the `dense` section in `meta.json` + +This mode does not: +- rebuild `catalog.jsonl` from source phenotype files +- rebuild `sparse_index.pkl` +- rerun LLM keyword derivation + +**Output Directory Contents** +After the main build, the output directory typically contains: + +1. `catalog.jsonl` +- shared phenotype documents for OHDSI and CIPHER sources +- includes derived fields such as `retrieval_keywords`, `retrieval_keywords_source`, `retrieval_concept_labels`, and `methodology_summary` + +2. `sparse_index.pkl` +- BM25-style sparse retrieval index + +3. `meta.json` +- build counts +- dense build status +- keyword derivation metadata +- embedding configuration metadata + +4. `definitions/` +- copied phenotype or cohort definition JSON files + +5. `keyword_cache.jsonl` +- optional append-only cache of successful LLM-derived retrieval keyword generations + +After dense indexing, the directory may also contain: + +6. `dense.index` +- FAISS dense index + +7. `embedding_cache.pkl` +- cached text embeddings keyed by hash of `retrieval_text` + +**Recommended Environment Variable** +Point the MCP retrieval layer at the built index directory: + +```bash +export PHENOTYPE_INDEX_DIR=/absolute/path/to/data/phenotype_index_cipher_omop +``` + +If `PHENOTYPE_INDEX_DIR` is not set, the retrieval layer falls back to the repo-relative default `data/phenotype_index`. + +**Sanity Check** +After a build, you can inspect the index quickly with: + +```bash +python - <<'PY' +from study_agent_mcp.retrieval.index import PhenotypeIndex +idx = PhenotypeIndex("data/phenotype_index_cipher_omop", allow_dense=False).load() +print(idx.meta.get("catalog_count")) +print(idx.fetch_summary("ohdsi:2")) +print(idx.fetch_summary("cipher:1976")) +print(idx.meta.get("keyword_derivation")) +PY +``` + +**Dense Status Check** +To confirm whether dense indexing was created: + +```bash +python - <<'PY' +import json +from pathlib import Path +meta = json.loads(Path("data/phenotype_index_cipher_omop/meta.json").read_text()) +print(meta.get("dense")) +PY +``` + +*Metadata indexing check* + +``` +/bin/sh -lc "python - <<'PY' +import json +from pathlib import Path +wanted = { + 'ohdsi:482','ohdsi:794','ohdsi:299','ohdsi:417','ohdsi:77','ohdsi:888', + 'ohdsi:979','ohdsi:1303','ohdsi:938','ohdsi:577','ohdsi:1347', + 'cipher:16285','cipher:4032','cipher:3962','cipher:16273','cipher:16291' +} +path = Path('data/phenotype_index/catalog.jsonl') +rows = {} +for line in path.read_text().splitlines(): + if not line.strip(): + continue + row = json.loads(line) + pid = row.get('phenotype_id') + if pid in wanted: + rows[pid] = { + 'phenotype_id': pid, + 'name': row.get('name'), + 'recommendation_metadata_source': row.get('recommendation_metadata_source'), + 'primary_clinical_topic': row.get('primary_clinical_topic'), + 'phenotype_role': row.get('phenotype_role'), + 'care_setting_scope': row.get('care_setting_scope'), + 'population_scope': row.get('population_scope'), + 'target_vs_context_conditions': row.get('target_vs_context_conditions'), + 'exclude_from_primary_topic_match': row.get('exclude_from_primary_topic_match'), + 'recommendation_summary': row.get('recommendation_summary'), + } +print(json.dumps(rows, indent=2, sort_keys=True)) +PY" +``` + + +**Operational Notes** +1. For large builds, a practical workflow is: +- build catalog plus sparse index first +- optionally enable LLM-derived keywords +- add dense indexing in a separate `--dense-only` pass + +2. The builder is safe to rerun, but the main build rewrites `catalog.jsonl`, `sparse_index.pkl`, and `meta.json`. + +3. Dense-only mode is intended specifically to avoid rerunning the main sparse build when only `dense.index` is missing. diff --git a/docs/PHENOTYPE_RECOMMENDATION_DESIGN.md b/docs/PHENOTYPE_RECOMMENDATION_DESIGN.md index e18844d..b1fa1b6 100644 --- a/docs/PHENOTYPE_RECOMMENDATION_DESIGN.md +++ b/docs/PHENOTYPE_RECOMMENDATION_DESIGN.md @@ -1,134 +1,120 @@ **Overview** -This document defines the `phenotype_recommendation` capability in the ACP + MCP architecture. The MCP service owns the phenotype index on local disk and exposes read-only retrieval tools. ACP only orchestrates LLM calls and tool invocations, and core remains pure/deterministic for validation and filtering. - -**Goals** -1. Move recall outside the LLM by using a hybrid retrieval index. -2. Send the LLM only a small candidate set for ranking and justification. -3. Keep index ownership inside MCP for air-gapped deployment. -4. Support regular updates from OHDSI Phenotype Library exports. - -**Non-Goals** -1. No direct DB/OMOP access in MCP tools. -2. No write or edit operations exposed through MCP tools. -3. No heavy external infrastructure dependencies for sparse search. - -**Components** -1. MCP Retrieval Layer - - Owns index storage on local disk. - - Exposes search and preview tools. -2. ACP Orchestration - - Calls MCP tools to retrieve candidates. - - Calls LLM to rank and justify. - - Validates LLM output via core. -3. Core Validation - - `phenotype_recommendations(...)` merges or filters LLM results against the candidate set. - -**Index Data Model** -Each phenotype is stored as a compact JSON document (one line per document): -1. `cohortId` -2. `name` -3. `short_description` -4. `tags` -5. `ontology_keys` -6. `signals` -7. `logic_features` -8. `pop_keywords` -9. `source_meta` - -**Index Directory Layout** -Default root is `PHENOTYPE_INDEX_DIR` or repo-relative `data/phenotype_index` (resolved from the MCP package location). -1. `catalog.jsonl` (compact phenotype docs) -2. `sparse_index.pkl` (pure-Python BM25-style index) -3. `dense.index` (FAISS index) -4. `meta.json` (index metadata) -5. `definitions/` (optional raw cohort JSON by `cohortId.json`) - -**Embedding Strategy** -1. Embed only `name + short_description + pop_keywords`. -2. Use the local embedding API: - - URL: `EMBED_URL` (default `http://localhost:3000/ollama/api/embed`) - - Model: `EMBED_MODEL` (default `qwen3-embedding:4b`) - - Key: `EMBED_API_KEY` (optional) -3. Cache embeddings by `(cohortId, input_text_hash)` to avoid recompute. - -**Sparse Retrieval Strategy** -1. Tokenize text using a simple regex tokenizer. -2. Build an inverted index with term frequencies. -3. Score with a lightweight BM25-style formula. -4. Store postings and doc lengths in `sparse_index.pkl`. - -**Hybrid Retrieval Flow** -1. Embed the query text (dense). -2. Run dense search (FAISS) for top-N. -3. Run sparse search (BM25) for top-N. -4. Merge scores using weighted sum or RRF. -5. Return top-K compact candidates to ACP/LLM. - -**MCP Tools (Read-Only)** -1. `phenotype_search(query, top_k=20)` -2. `phenotype_fetch_summary(cohortId)` -3. `phenotype_fetch_definition(cohortId, truncate=true)` -4. `phenotype_list_similar(cohortId, top_k=10)` -5. `phenotype_prompt_bundle(task)` (returns overview/spec/output_schema) -6. `phenotype_index_status()` (returns index path + file existence for preflight checks) - -**ACP Orchestration** -1. User submits study intent to ACP. -2. ACP calls `phenotype_search` to get top-K candidates. -3. ACP calls `phenotype_prompt_bundle` to fetch prompt assets. -4. ACP calls LLM with candidates for ranking and justification. -5. ACP validates with `core.phenotype_recommendations(...)`. - -Candidate selection: -1. ACP truncates the candidate list before the LLM using `LLM_CANDIDATE_LIMIT` or per-request `candidate_limit`. -2. ACP supports `candidate_offset` to request the next window of candidates from MCP `phenotype_search` - (for example, offset by `candidate_limit` to avoid re-sending the same top hits). - -**Phenotype Improvements Scope** -1. The improvements flow reviews one phenotype definition at a time. -2. If multiple cohorts are provided, ACP uses the first cohort only. -3. If the cohort JSON has no `id`, ACP injects a synthetic `id` for validation only and does not write it back. - -**LLM Formats** -1. Default: OpenAI Chat Completions payload (`/v1/chat/completions`-style). -2. Optional: OpenAI Responses payload (`/v1/responses`-style) enabled with `LLM_USE_RESPONSES=1`. -3. This setting only changes request/response formatting for the LLM API; it does not affect MCP tool usage. - -**Update and Reindex** -1. MCP exposes `POST /phenotypes/reindex` for manual refresh. -2. Index build script accepts CSV metadata + JSON cohort definitions. -3. Regular updates are expected; rebuild is safe and idempotent. - -**Configuration** -1. `PHENOTYPE_INDEX_DIR` (default `data/phenotype_index`) -2. `EMBED_URL` (default `http://localhost:3000/ollama/api/embed`) -3. `EMBED_MODEL` (default `qwen3-embedding:4b`) -4. `EMBED_API_KEY` (optional) -5. `PHENOTYPE_DENSE_WEIGHT` (default `0.6`) -6. `PHENOTYPE_SPARSE_WEIGHT` (default `0.4`) -7. `LLM_API_URL` (default `http://localhost:3000/api/chat/completions`) -8. `LLM_API_KEY` (required for LLM calls) -9. `LLM_MODEL` (default `agentstudyassistant`) -10. `LLM_TIMEOUT` (default `180`) -11. `LLM_LOG` (default `0`) enables verbose LLM logging in the ACP logger (config, prompt, raw response). -12. `LLM_DRY_RUN` (default `0`) -13. `LLM_USE_RESPONSES` (default `0`) selects OpenAI Responses API format instead of Chat Completions. It does not affect MCP tool use. -14. `LLM_CANDIDATE_LIMIT` (default `10`) -15. `STUDY_AGENT_MCP_ONESHOT` (default `0`, forced on Windows) runs MCP in per-request oneshot mode to avoid stdio lockups. -16. `STUDY_AGENT_BASE_DIR` (optional) base directory for resolving relative paths (index dir, banner, outputs). -17. `STUDY_AGENT_THREADING` (default `1`) uses a threaded HTTP server for ACP. Set to `0` to disable. -18. `STUDY_AGENT_HOST` (default `127.0.0.1`) -19. `STUDY_AGENT_PORT` (default `8765`) -20. `STUDY_AGENT_MCP_CWD` (optional) working directory passed to MCP subprocesses. Use for stable relative paths. -21. `MCP_LOG_LEVEL` (default `INFO`) controls MCP logger verbosity (`DEBUG|INFO|WARN|ERROR|OFF`). -22. `STUDY_AGENT_MCP_URL` (optional) HTTP MCP endpoint. When set, ACP uses HTTP and ignores `STUDY_AGENT_MCP_COMMAND`. -23. `STUDY_AGENT_MCP_TOKEN` (optional) bearer token passed to MCP over HTTP. -24. `STUDY_AGENT_MCP_TIMEOUT` (default `30`) HTTP MCP request timeout in seconds. - -**Risks and Mitigations** -1. Missing dependencies for FAISS - - Mitigation: allow sparse-only mode with explicit warning. -2. Inconsistent or missing metadata fields - - Mitigation: robust fallbacks when building catalog rows. -3. Large updates - - Mitigation: incremental caching by text hash, batch embedding. +This document defines the `phenotype_recommendation` design in the ACP + MCP architecture. The MCP service owns the phenotype index on local disk and exposes read-only retrieval tools. ACP orchestrates retrieval, multiple LLM calls, deterministic shortlist enforcement, and final response assembly. Core remains pure and deterministic for schema validation and final filtering. + + +```mermaid +sequenceDiagram + autonumber + actor U as User / Client + participant A as ACP HTTP Server + participant G as ACP StudyAgent + participant M as MCP Server + participant L as LLM + participant C as Core Validators + + U->>A: POST /flows/phenotype_recommendation
{study_intent, top_k, max_results, candidate_limit} + A->>G: run_phenotype_recommendation_flow(...) + + Note over G: Stage 1: Candidate retrieval + G->>M: phenotype_search(query=study_intent, top_k, offset?) + M-->>G: search results + scores
(dense/sparse hybrid retrieval) + + Note over G: Stage 2: Intent-facet extraction + G->>M: phenotype_prompt_bundle(task="phenotype_recommendation_intent_facets") + M-->>G: overview + spec + output_schema + G->>L: intent facets prompt
(study_intent only) + L-->>G: {plan, intent_facets, reasoning_notes} + G->>G: normalize intent facets and aliases + + Note over G: Stage 3: Planning shortlist + G->>M: phenotype_fetch_summary(...) x N + M-->>G: hydrated candidate summaries + G->>G: rerank planning candidates using intent facets + metadata + G->>M: phenotype_prompt_bundle(task="phenotype_recommendation_plan") + M-->>G: overview + spec + output_schema + G->>L: planning prompt
(study_intent + planner candidate band) + L-->>G: {plan, shortlist_ids, needs_more_search, reasoning_notes} + G->>C: phenotype_recommendation_plan(...) + C-->>G: validated planning payload + G->>G: enforce shortlist deterministically
block unsafe rows, dedupe, suppress weak filler + G->>M: phenotype_fetch_summary(...) x shortlist + M-->>G: hydrated shortlist rows + G->>G: rebuild planning reasoning_notes from enforced shortlist + + Note over G: Stage 4: Final recommendation text + G->>M: phenotype_prompt_bundle(task="phenotype_recommendations") + M-->>G: overview + spec + output_schema + G->>L: final recommendation prompt
(study_intent + final compact candidates) + L-->>G: {plan, phenotype_recommendations} + + Note over G: Stage 5: Deterministic finalization + G->>G: validate LLM payload against shortlist/catalog + G->>G: build deterministic final payload
ids come from enforced shortlist, LLM supplies usable justifications + G->>C: phenotype_recommendations(...) + C-->>G: final grounded response + + G-->>A: {status, search, intent_facets, planning,
recommendations, diagnostics} + A-->>U: HTTP 200 JSON +``` + +A compact swimlane version with the main responsibility split: + +```mermaid +flowchart LR + subgraph User + U1[Submit study intent] + end + + subgraph ACP + A1[HTTP flow handler] + A2[Run phenotype recommendation flow] + A3[Intent facet extraction] + A4[Planning rerank and shortlist enforcement] + A5[Final deterministic recommendation assembly] + end + + subgraph MCP + M1[phenotype_search] + M2[phenotype_fetch_summary] + M3[phenotype_prompt_bundle] + end + + subgraph LLM + L1[Intent facets call] + L2[Planning call] + L3[Final recommendation call] + end + + subgraph Core + C1[Plan validator] + C2[Final recommendation validator] + end + + U1 --> A1 + A1 --> A2 + A2 --> M1 + M1 --> A3 + A3 --> M3 + M3 --> L1 + L1 --> A3 + A3 --> M2 + M2 --> A4 + A4 --> M3 + M3 --> L2 + L2 --> C1 + C1 --> A4 + A4 --> M2 + M2 --> A5 + A5 --> M3 + M3 --> L3 + L3 --> A5 + A5 --> C2 + C2 --> A1 + A1 --> U1 +``` + + +A few implementation notes worth keeping in mind: +- MCP does not rank finally; it only retrieves candidates and prompt assets. +- ACP owns the real decision logic now: reranking, blocking, shortlist enforcement, dedupe, deterministic final ids. +- The final LLM call is advisory for text/justification, not for unconstrained id selection. diff --git a/docs/SERVICE_REGISTRY.yaml b/docs/SERVICE_REGISTRY.yaml index df518c6..6ddd02e 100644 --- a/docs/SERVICE_REGISTRY.yaml +++ b/docs/SERVICE_REGISTRY.yaml @@ -108,10 +108,21 @@ services: - mode - diagnostics validation: - - fail-closed sanitization before any LLM call - - only observed candidate_items may appear in structured ranking output - - index_event is never eligible for structured ranking - - optional enrichment tools are additive and not required for successful execution + rules: + - fail-closed sanitization before any LLM call + - only observed candidate_items may appear in structured ranking output + - index_event is never eligible for structured ranking + - optional enrichment tools are additive and not required for successful execution + controlled_identifier_keys: + - ingred_rxcui + - rxcui + - adverse_event_meddra_id + - meddra_code + - meddra_id + - concept_id + - ingredient_concept_id + - adverse_event_concept_id + - outcome_concept_id keeper_concept_sets_generate: endpoint: /flows/keeper_concept_sets_generate @@ -191,3 +202,37 @@ services: - questions validation: - core.phenotype_intent_split schema validation + + cohort_methods_intent_split: + endpoint: /flows/cohort_methods_intent_split + mcp_tools: + - cohort_methods_intent_split + input: + - study_intent + output: + - target_statement + - comparator_statement + - outcome_statement + - rationale + - questions + validation: + - core.cohort_methods_intent_split schema validation + + cohort_methods_specifications_recommendation: + endpoint: /flows/cohort_methods_specifications_recommendation + mcp_tools: + - cohort_methods_prompt_bundle + input: + - analytic_settings_description + - study_description + - study_intent + output: + - status + - recommendation + - cohort_methods_specifications + - section_rationales + - diagnostics + validation: + - core.validate_cohort_methods_spec top-level validation + - core.validate_section with default backfill for invalid sections + - core.cohort_methods_spec_to_shell_recommendation projection diff --git a/docs/STRATEGUS_COHORT_METHODS_SHELL.md b/docs/STRATEGUS_COHORT_METHODS_SHELL.md new file mode 100644 index 0000000..eff9c89 --- /dev/null +++ b/docs/STRATEGUS_COHORT_METHODS_SHELL.md @@ -0,0 +1,258 @@ +# Strategus Cohort Methods Shell + +Current stage scope: + +- Cohort methods shell with ACP-assisted intent split and phenotype recommendation. +- The shell can derive target/comparator/outcome statements from a study intent. +- The shell can configure one effective analytic-settings profile through `step_by_step` prompts or `free_text` ACP recommendation. +- The shell writes reproducible R scripts, a Strategus analysis specification, and a merged CohortMethod execution script. + +This shell is provided as `OHDSIAssistant::runStrategusCohortMethodsShell()`. + +## Running + +Usage examples for `OHDSIAssistant::runStrategusCohortMethodsShell()` live in the R package README: `R/OHDSIAssistant/README.md`. + +Workflow diagrams live in `docs/COHORT_METHODS_WORKFLOW.md`. + +## Current Stage Flow + +1. Manual collection of required identifiers: + - `studyIntent` +2. ACP-assisted split of `studyIntent` into: + - `targetStatement` + - `comparatorStatement` + - one or more outcome statements (`outcomeStatement` remains the primary/first outcome for compatibility) + - when multiple outcome statements are suggested interactively, choose the subset to keep or enter none/0 to provide a manual outcome before editing or adding statements +3. Role-specific phenotype recommendation / cache reuse for target, comparator, and outcome cohorts. + Interactive runs ask for short analysis labels for selected cohorts and the comparison; labels must + be 50 characters or fewer because downstream Strategus/Characterization result tables use short + identifier fields. +4. Optional cohort ID remap step to avoid collisions (`remapCohortIds`). +5. Copy cohort JSON definitions from `indexDir/definitions` into selected cohort folders. +6. Optional negative control and covariate concept-set IDs are captured as placeholders. +7. Configure one analytic-settings profile through `step_by_step`, `free_text`, or cached/function-argument inputs. + Analytic settings are always collected in this stage and confirmed before finalization. +8. Generate scripts in `scripts/` for cohort generation, keeper review, diagnostics, and + CohortMethod spec/execution. + +## Analytic Settings + +The cohort methods shell now resolves a single effective analytic-settings profile. This remains +prompt/cache/free-text-driven only; there is no public function argument that accepts a complete +analytic-settings object in this stage. + +Supported configuration modes: + +- `step_by_step` +- `free_text` + +At a high level: + +- `step_by_step` covers study population, time-at-risk, propensity score adjustment, and outcome model settings. +- `free_text` uses an ACP recommendation when available and falls back to a local stub if ACP is unavailable. +- Persisted JSON keeps the existing `CohortMethod`-aligned field names. + +The effective selected profile is written to `outputs/cm_analysis_defaults.json`, which retains +profile metadata such as `profile_name`, `source`, and `customized_sections`. The generated +`scripts/06_cm_spec.R` combines those defaults with `outputs/cm_comparisons.json` and the selected +cohort definitions to create a Strategus analysis specification. + +For traceability: + +- `outputs/manual_inputs.json` stores the effective `analytic_settings` block plus the + `customized_sections` array, the selected analytic-settings mode, and any free-text metadata. +- `outputs/cm_analytic_settings_recommendation.json` is written only for `free_text` mode in the + current stage. It stores the shell-facing recommendation derived from the ACP response or, if ACP + is unavailable, from the local fallback. +- `outputs/cm_acp_specifications_recommendation.json` is written for `free_text` mode and stores + the ACP flow request/response wrapper used to derive the shell-facing recommendation. +- `outputs/study_agent_state.json` echoes `analytic_settings_profile_name` and + `analytic_settings_customized_sections`, plus analytic-settings mode / confirmation summary. +- `analysis-settings/cmAnalysis.json` stores the template-shaped CohortMethod-oriented contract + artifact. The generated `06_cm_spec.R` currently still reads `outputs/cm_analysis_defaults.json` + as its execution settings source. + +## Output Layout + +The following directories are created under `outputDir`: + +- `outputs/` +- `selected-cohorts/` +- `selected-target-cohorts/` +- `selected-comparator-cohorts/` +- `selected-outcome-cohorts/` +- `patched-cohorts/` +- `patched-target-cohorts/` +- `patched-comparator-cohorts/` +- `patched-outcome-cohorts/` +- `keeper-case-review/` +- `concept-sets/` +- `analysis-settings/` +- `scripts/` +- `cm-results/` +- `cm-diagnostics/` +- `cm-data/` + +### `outputs/` artifacts + +- `manual_intent.json` +- `manual_inputs.json` +- `cohort_methods_intent_split.json` +- `cohort_id_map.json` +- `cohort_roles.json` +- `recommendations_target.json` +- `recommendations_comparator.json` +- `recommendations_outcome.json` +- `recommendations_outcome_.json` (when multiple outcome statements are recommended separately) +- `cm_comparisons.json` +- `cm_analysis_defaults.json` +- `cm_acp_specifications_recommendation.json` (free-text mode only) +- `cm_analytic_settings_recommendation.json` (free-text mode only) +- `cm_concept_set_selections.json` +- `improvements_target.json` +- `improvements_comparator.json` +- `improvements_outcome.json` +- `improvements_status.json` +- `cm_evaluation_todo.json` +- `cm_analysis_state.json` (written by `scripts/06_cm_spec.R`) +- `study_agent_state.json` + +`cm_analysis_defaults.json` stores the effective analytic-settings profile used by the generated +`06_cm_spec.R` when projecting shell settings into Strategus module specifications. + +`manual_inputs.json` is the cache/resume-friendly shell artifact for the same run. It includes the +effective `analytic_settings` object plus `customized_sections`. + +`06_cm_spec.R` reads the expanded analytic-settings schema and uses it directly when constructing +the CohortMethod module settings: + +- `getDbCohortMethodDataArgs` +- `createStudyPopulationArgs` +- `createPsArgs` +- `trimByPsArgs` +- `matchOnPsArgs` +- `stratifyByPsArgs` +- `fitOutcomeModelArgs` + +It also writes `analysis-settings/analysisSpecification.json`, a Strategus specification containing: + +- a shared cohort-definition resource +- `CharacterizationModule` +- `CohortIncidenceModule` +- `CohortMethodModule` + +The generated script uses `CohortGeneratorModule$new()` only to create the shared cohort-definition +resource. The generated Strategus specification intentionally does not add a cohort-generation module +specification or a `CohortDiagnosticsModule` specification because cohort generation and diagnostics +are handled by `03_generate_cohorts.R` and `05_diagnostics.R`. + +`scripts/06_cm_spec.R` also writes: + +- `outputs/cm_analysis_state.json` +- `analysis-settings/strategus_execute_result.rds` + +The same `06_cm_spec.R` script then executes the just-created specification with +`Strategus::execute()`. There is no separate `07_cm_run_analyses.R` in the merged Strategus +CohortMethod flow. + +## Generated Scripts + +- `scripts/02_apply_improvements.R` +- `scripts/03_generate_cohorts.R` +- `scripts/04_keeper_review.R` +- `scripts/05_diagnostics.R` +- `scripts/06_cm_spec.R` + +Generated scripts that connect to the database expect these site-specific files at the root of +`outputDir`: + +- `strategus-db-details.json` +- `strategus-execution-settings.json` + +The scripts still contain placeholders for values that are not captured in those files yet, such as +`databaseId` for Keeper/export steps. + +## Current Boundaries + +- `phenotype_improvements` is wired for target, comparator, and outcome cohorts. The shell writes + role-specific improvement artifacts after prompting whether to run improvements for each role, + can apply mutating actions (`set`, `replace`, `update`), keeps advisory `note` actions as + recommendations, and keeps `patched-cohorts/` complete for downstream scripts when any mutating + improvement is applied. +- Remaining deferred integration points: + - comparator reuse lookup + - phenotype index search for suggestion workflows +- Atlas settings deferred in this stage: + - negative control cohort-definition logic + - positive control synthesis + - empirical calibration configuration + - detailed covariate feature-group selection beyond the current default-plus-include/exclude model +- TODO: implement ACP/MCP support for negative control and covariate concept-set workflows, then + update the shell to use those tools instead of writing dummy placeholder concept-set artifacts. +- Covariate concept-set include/exclude is not fully implemented yet. Because the generated + CohortMethod scripts cannot currently materialize exclude covariate concepts, high-correlation + covariates may remain in the model and cause `06_cm_spec.R` to fail when + `errorOnHighCorrelation` is enabled. +- Atlas / CohortMethod settings partially supported but still needing broader validation: + - `minDaysAtRisk` + - PS trimming (`none`, percent trimming, and equipoise bounds) + - `inversePtWeighting` passed through to `fitOutcomeModelArgs` +- Evaluation settings from section 12.7.3 remain deferred as well. +- Multiple analytic-settings profiles, multi-comparison support, and broader CohortMethod branching + remain for a later stage. +- Script TODO comments document where these extensions are expected. + +## Notes + +- This stage is designed as a bridge: it combines ACP/MCP-assisted intent split, phenotype + recommendation/improvement, and analytic-settings recommendation with reproducible Strategus + script generation. + +## Analytic Settings Prompt Details + +Current `step_by_step` section flow: + +- `study_population` +- `time_at_risk` +- `propensity_score_adjustment` +- `outcome_model` + +User-facing `step_by_step` prompts follow the ATLAS section grouping: + +- ask only the section's core settings directly +- then offer a keep-defaults step for the remaining hidden/default settings +- if the user declines defaults, ask each remaining exposed setting one by one +- show short setting names only in default summaries and final summaries +- show detailed per-setting descriptions only in the one-by-one customization path + +Exception for propensity score adjustment: + +- first ask the strategy: `match_on_ps`, `stratify_by_ps`, or `none` +- if `match_on_ps`, ask only `maxRatio` +- if `stratify_by_ps`, ask only `numberOfStrata` +- after that, show the remaining PS defaults and ask whether to keep them +- if the user declines defaults, ask the exposed remaining PS settings one by one +- exposed PS trimming settings in that remaining-defaults path: `trimmingStrategy` with `none`, `by_percent`, or `by_equipoise` +- if trimming is customized to `by_percent`, the shell asks for the trimming percent +- if trimming is customized to `by_equipoise`, the shell asks for the lower and upper equipoise bounds +- match defaults currently exposed in that remaining-defaults path: `maxCohortSizeForFitting`, `errorOnHighCorrelation`, `useRegularization`, `caliper`, `caliperScale` +- stratify defaults currently exposed in that remaining-defaults path: `maxCohortSizeForFitting`, `errorOnHighCorrelation`, `useRegularization`, `baseSelection` +- hidden internal defaults such as `create_ps.estimator` still remain persisted but are not directly prompted + +Current execution defaults and persisted artifacts use these effective analytic-settings fields: + +- `profile_name` +- Study population: `studyStartDate`, `studyEndDate`, `maxCohortSize`, `firstExposureOnly`, `washoutPeriod`, `restrictToCommonPeriod`, `removeDuplicateSubjects`, `censorAtNewRiskWindow`, `removeSubjectsWithPriorOutcome`, `priorOutcomeLookback` +- Covariate settings: current default covariate behavior, include-all state, include concept-set selection, exclude concept-set selection +- Time-at-risk: `minDaysAtRisk`, `riskWindowStart`, `startAnchor`, `riskWindowEnd`, `endAnchor` +- Propensity score adjustment: `strategy`, `trimmingStrategy`, `trimmingPercent`, `equipoiseLowerBound`, `equipoiseUpperBound`, `estimator`, `maxCohortSizeForFitting`, `errorOnHighCorrelation`, `useRegularization`, matching `caliper`, matching `caliperScale`, matching `maxRatio`, stratification `numberOfStrata`, stratification `baseSelection` +- Outcome model: `modelType`, `stratified`, `useCovariates`, `inversePtWeighting`, `useRegularization` + +Important current default behavior: + +- Matching defaults follow CohortMethod defaults, including `maxRatio = 1`, `caliper = 0.2`, and `caliperScale = "standardized logit"`. +- PS fitting defaults exposed in the shell include `maxCohortSizeForFitting = 250000`, `errorOnHighCorrelation = FALSE`, and `useRegularization = TRUE`. +- PS trimming defaults exposed in the shell include `trimmingStrategy = none`, `trimmingPercent = 5`, and equipoise bounds `c(0.25, 0.75)`. +- Time-at-risk defaults follow CohortMethod defaults, including `riskWindowStart = 0` and `censorAtNewRiskWindow = FALSE`. +- Outcome-model defaults are partially dynamic: `stratified = FALSE` for no PS adjustment or one-to-one matching, `stratified = TRUE` for variable-ratio matching and PS stratification, `useCovariates = FALSE`, `inversePtWeighting = FALSE`, and `useRegularization = TRUE`. diff --git a/docs/TESTING.md b/docs/TESTING.md index dcaae86..0771b95 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -285,6 +285,7 @@ curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ Expected recommendation responses now include `llm_used`, `llm_status`, `fallback_reason`, `fallback_mode`, and `diagnostics`. If the LLM path fails to parse or validate, ACP still returns `status: ok` with an explicit machine-readable fallback reason instead of silently degrading. + ## Timeout calibration Use the automated calibration task to derive environment-specific starting values for `EMBED_TIMEOUT`, `STUDY_AGENT_MCP_TIMEOUT`, `LLM_TIMEOUT`, and `ACP_TIMEOUT`: @@ -344,6 +345,64 @@ Invoke-RestMethod ` -TimeoutSec 180 ``` +Cohort methods intent split (target/comparator/outcome statements): + +```bash +curl -s -X POST http://127.0.0.1:8765/flows/cohort_methods_intent_split \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"What is the risk of angioedema or acute myocardial infarction in new users of ACE inhibitors compared to new users of thiazide and thiazide-like diuretics?"}' +``` + +PowerShell (Windows) equivalent: + +```powershell +$body = @{ + study_intent = "What is the risk of angioedema or acute myocardial infarction in new users of ACE inhibitors compared to new users of thiazide and thiazide-like diuretics?" +} | ConvertTo-Json + +Invoke-RestMethod ` + -Method Post ` + -Uri http://127.0.0.1:8765/flows/cohort_methods_intent_split ` + -Headers @{ "Content-Type" = "application/json" } ` + -Body $body ` + -TimeoutSec 180 +``` + +Cohort methods specifications recommendation (analytic settings): + +```bash +curl -s -X POST http://127.0.0.1:8765/flows/cohort_methods_specifications_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"analytic_settings_description":"Compare sitagliptin new users vs glipizide new users for acute myocardial infarction. Use a 365-day washout, intent-to-treat follow-up, 1:1 propensity score matching on standardized logit with a caliper of 0.2, and a Cox model.","study_intent":"Comparative effectiveness study on CV outcomes."}' | python -m json.tool +``` + +PowerShell (Windows) equivalent: + +```powershell +$body = @{ + analytic_settings_description = "Compare sitagliptin new users vs glipizide new users for acute myocardial infarction. Use a 365-day washout, intent-to-treat follow-up, 1:1 propensity score matching on standardized logit with a caliper of 0.2, and a Cox model." + study_intent = "Comparative effectiveness study on CV outcomes." +} | ConvertTo-Json + +Invoke-RestMethod ` + -Method Post ` + -Uri http://127.0.0.1:8765/flows/cohort_methods_specifications_recommendation ` + -Headers @{ "Content-Type" = "application/json" } ` + -Body $body ` + -TimeoutSec 240 +``` + +Expected responses include `status`, `recommendation`, `cohort_methods_specifications`, `section_rationales`, and `diagnostics`. Valid top-level statuses are `ok`, `schema_validation_error`, and `llm_parse_error`; parse or section validation failures should return a backfilled recommendation with diagnostics rather than an unstructured response. + +For local non-live coverage of the route, input model, validation, and mocked ACP flow: + +```bash +pytest tests/test_acp_cohort_methods_route.py \ + tests/test_cohort_methods_specs_models.py \ + tests/test_cohort_methods_spec_validation.py \ + tests/test_acp_cohort_methods_flow.py +``` + ## ACP flow examples (MCP-backed) Phenotype improvements: @@ -993,6 +1052,27 @@ doit smoke_concept_sets_review_flow doit smoke_cohort_critique_flow ``` +## Cohort methods specifications recommendation smoke test + +This live ACP + MCP smoke test requires LLM credentials, because the flow asks the LLM to map free-text cohort-method analytic settings into the CohortMethod specification shape: + +```bash +export LLM_API_KEY="..." +doit smoke_cohort_methods_specs_recommend_flow +``` + +If you want `doit` to start MCP over HTTP automatically, use the same managed MCP settings as the phenotype flow smoke test: + +```bash +export STUDY_AGENT_MCP_URL="http://127.0.0.1:8790/mcp" +export STUDY_AGENT_MCP_MANAGED=1 +export MCP_START_TIMEOUT=3 +export LLM_API_KEY="..." +doit smoke_cohort_methods_specs_recommend_flow +``` + +The smoke test posts to `/flows/cohort_methods_specifications_recommendation` and checks that the response status is one of `ok`, `schema_validation_error`, or `llm_parse_error`, and that `recommendation.raw_description` is present. + ## Phenotype validation review smoke test ```bash diff --git a/docs/evaluation/phenotype_recommendations/phenotype-recommendation-sparse-index-evaluation.md b/docs/evaluation/phenotype_recommendations/phenotype-recommendation-sparse-index-evaluation.md new file mode 100644 index 0000000..4e365fc --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/phenotype-recommendation-sparse-index-evaluation.md @@ -0,0 +1,91 @@ +# Phenotype Recommendation Sparse Index Evaluation + +## Objective +This evaluation assessed two related questions for the phenotype recommendation workflow. First, we wanted to confirm that recent cleanup and refactoring work did not introduce regressions in phenotype recommendation behavior. Second, we wanted to estimate whether the sparse retrieval index is contributing enough value to justify its build cost, especially for VA/CIPHER-oriented phenotypes and long-tail phenotype titles. + +The comparison was motivated by the current implementation state of the phenotype recommendation flow after a series of ranking, shortlist-enforcement, prompt, and refactor changes. The practical decision point for this sprint was whether the team could simplify retrieval by reducing or removing sparse weighting without materially degrading shortlist quality. + +## Methods +A fixed multi-case phenotype recommendation batch was executed repeatedly through the ACP flow using the local `phenotype_recommendation` endpoint. The test set included mixed OHDSI and CIPHER titles, including diagnosis phenotypes, procedure phenotypes, medication exposure phenotypes, shorthand and acronym cases, VA/CIPHER-skewed phenotypes, and previously identified control cases such as AAA, COPD, ADRD, GI bleed, heart failure hospitalization, and COVID outpatient diagnosis. The titles were sampled randomly from the titles of the phenotype definitions which then converted to study intent statements automatically with human review. + +The same batch was run under three retrieval weighting configurations: dense/sparse `0.7 / 0.3` as the working baseline, dense/sparse `1.0 / 0.0` as the dense-only condition, and dense/sparse `0.8 / 0.2` as an intermediate condition. For each run, heredoc summaries were reviewed with attention to `planning.shortlist_ids`, final recommendation ids, and visible rerank evidence. Evaluation artifacts were preserved in this folder, including the shell script used to execute the batch and the full text outputs from each weighting condition. + +## Results +The dense-only condition (`1.0 / 0.0`) produced large behavioral changes relative to the baseline. Across 71 evaluated cases, 49 cases changed in shortlist and/or final recommendation ids. Several of those changes were clear regressions, including loss of strong CIPHER-heavy results for abdominal aortic aneurysm in veterans, degradation of multiple VA/CIPHER-oriented chronic disease phenotypes, and drift toward weaker generic OHDSI neighbors in reviewer-facing cases such as cardiac complications, renal sclerosis, developmental disorders, and aortic valve disease. Dense-only also caused some known-risk cases to improve, notably rifamycin exposure and some GI bleed outputs, but the regressions were broader and more consequential than the gains. + +The intermediate condition (`0.8 / 0.2`) behaved as a partial compromise but still changed 39 of 71 cases relative to the `0.7 / 0.3` baseline. It preserved some sparse-dependent wins that dense-only had lost, including stable outputs for fasciitis, keloid scars, COPD, and developmental disorders, and it retained the improvement on rifamycin exposure. However, it still regressed several important VA/CIPHER-facing cases relative to baseline, including renal sclerosis, veteran cardiac complications, and some reviewer-oriented long-tail phenotypes. It also did not fully resolve medication-exposure precision problems. + +## Discussion +The main conclusion from this evaluation is that the sparse index is currently contributing meaningful retrieval value and should be retained for the phenotype recommendation workflow. The evidence is not merely that some individual recommendations changed, but that removing sparse weighting altered a large fraction of the batch and materially weakened several cases that are important for CIPHER-oriented human review. In particular, the long-tail VA/CIPHER slice appears sensitive to sparse support in a way that dense-only retrieval does not currently reproduce. + +For the current sprint, the baseline weighting of dense/sparse `0.7 / 0.3` remains the best operational choice. The `0.8 / 0.2` condition is informative and may be worth revisiting in a future tuning pass, but it does not yet offer a clear enough improvement to justify changing the default immediately before human evaluation. The dense-only condition should not be adopted at present. Future follow-up work, if needed, should focus on targeted holdout cases such as medication exposure precision and selected context-heavy phenotypes rather than broad retrieval simplification before review. + +### Next steps + +We have listed below a set of student intent statements that should be helpful for a first round human review. + +(TODO: revise to ensure variability of incidence, diagnostic difficulty (e.g., length of differential diagnoses with less than negligable incidence), ...) + +**Human test cases** +1. `Veteran patients with renal sclerosis` +2. `Veteran patients with polymyalgia rheumatica` +3. `Veteran patients with autoimmune hemolytic anemia` +4. `Patients diagnosed with fasciitis` +5. `Patients with stomatitis or mucositis` +6. `Patients with Barretts esophagus` +7. `Patients with regional enteritis` +8. `Patients with chronic periodontitis` +9. `Patients with scleritis or episcleritis` +10. `veterans who experienced an abdominal aortic aneurysm` +11. `patients with COPD according to diagnostic codes in the EHR` +12. `patients who experienced a GI bleed adverse event` +13. `older adults with a likely diagnosis of ADRD or late-stage dementia` +14. `patients hospitalized at least once for heart failure` + +**Holdout Cases** +Keep these out of the main reviewer round and track them separately for internal analysis. +- `patients who received a COVID-19 diagnosis in the outpatient setting` +- `Patients with MSI-low rectal adenocarcinoma` +- `patients with a drug exposure to acetaminophen in the hospital setting` +- `patients exposed to rifamycin antibiotics` +- `Patients with a urinary tract infection who are new users of cephalosporins` +- `Patients hospitalized with preinfarction syndrome` + +**Compact Scoring Sheet** +Have each reviewer score each case on 3 axes. + +- `Top recommendation quality` + - `2` = clearly appropriate + - `1` = plausible but not ideal + - `0` = inappropriate + +- `Overall shortlist quality` + - `2` = all returned recommendations are defensible + - `1` = one weaker but acceptable extra result + - `0` = one or more clearly off-target results + +- `Usefulness for study planning` + - `2` = would confidently use as a starting point + - `1` = useful with manual review + - `0` = not useful + +Optional binary flags: +- `Too broad` +- `Too narrow` +- `Wrong role` +- `Wrong care setting` +- `Duplicate/near-duplicate` +- `Missing obvious better phenotype` + +**Simple Reviewer Table** +Use one row per case: + +```text +Case | Query | Top quality (0-2) | Shortlist quality (0-2) | Usefulness (0-2) | Flags | Notes +``` + +**Suggested Success Threshold** +For the main reviewer batch, I’d treat this as ready if: +- most cases score `2` on top recommendation quality +- few or no cases score `0` on overall shortlist quality +- average usefulness is at least `1.5` diff --git a/docs/evaluation/phenotype_recommendations/phenotype-recommendation-sparse-index-test-cases.sh b/docs/evaluation/phenotype_recommendations/phenotype-recommendation-sparse-index-test-cases.sh new file mode 100755 index 0000000..a5c0b69 --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/phenotype-recommendation-sparse-index-test-cases.sh @@ -0,0 +1,577 @@ +#!/bin/bash + +## Example random sample approach +# # Cipher +# for i in data/phenotype_index_cipher_omop/definitions/cipher_*; do cat $i | jq '.fullName' >> /tmp/fulnames.txt ; done; +# shuf -n 50 /tmp/fulnames.txt +# "Other disorders of carbohydrate transport and metabolism (MAP)" +# "Dyschromia and Vitiligo (gwPheWAS)" +# "Nerve Plexus Lesions (Phecode)" +# "Ulcerative colitis (chronic) (gwPheWAS)" +# "Pervasive Developmental Disorders (Phecode)" +# # OHDSI +# cut -d, -f2 data/Cohorts.csv | shuf -n 5 +# "[P] acetaminophen exposure 10" +# "[P] Acute Hepatic Injury with no pre-existing liver disease" +# "[P] Posterior reversible encephalopathy syndrome PRES" +# "[P][R] Acute myocardial infarction" +# "[P] Antiphospholipid syndrome" +################################################################################ + +# **From below, you can uncomment/comment out commands to create an executable human reviewer Batch** + +# 1. `Veteran patients with renal sclerosis` +# 2. `Veteran patients with polymyalgia rheumatica` +# 3. `Veteran patients with autoimmune hemolytic anemia` +# 4. `Patients diagnosed with fasciitis` +# 5. `Patients with stomatitis or mucositis` +# 6. `Patients with Barretts esophagus` +# 7. `Patients with regional enteritis` +# 8. `Patients with chronic periodontitis` +# 9. `Patients with scleritis or episcleritis` +# 10. `veterans who experienced an abdominal aortic aneurysm` +# 11. `patients with COPD according to diagnostic codes in the EHR` +# 12. `patients who experienced a GI bleed adverse event` +# 13. `older adults with a likely diagnosis of ADRD or late-stage dementia` +# 14. `patients hospitalized at least once for heart failure` + +# **Holdout Cases** +# Keep these out of the main reviewer round and track them separately for internal analysis. +# - `patients who received a COVID-19 diagnosis in the outpatient setting` +# - `Patients with MSI-low rectal adenocarcinoma` +# - `patients with a drug exposure to acetaminophen in the hospital setting` +# - `patients exposed to rifamycin antibiotics` +# - `Patients with a urinary tract infection who are new users of cephalosporins` +# - `Patients hospitalized with preinfarction syndrome` + +# **Compact Scoring Sheet** +# Have each reviewer score each case on 3 axes. + +# - `Top recommendation quality` +# - `2` = clearly appropriate +# - `1` = plausible but not ideal +# - `0` = inappropriate + +# - `Overall shortlist quality` +# - `2` = all returned recommendations are defensible +# - `1` = one weaker but acceptable extra result +# - `0` = one or more clearly off-target results + +# - `Usefulness for study planning` +# - `2` = would confidently use as a starting point +# - `1` = useful with manual review +# - `0` = not useful + +# Optional binary flags: +# - `Too broad` +# - `Too narrow` +# - `Wrong role` +# - `Wrong care setting` +# - `Duplicate/near-duplicate` +# - `Missing obvious better phenotype` + +# **Simple Reviewer Table** +# Use one row per case: + +# ```text +# Case | Query | Top quality (0-2) | Shortlist quality (0-2) | Usefulness (0-2) | Flags | Notes +# ``` + +# **Suggested Success Threshold** +# For the main reviewer batch, I’d treat this as ready if: +# - most cases score `2` on top recommendation quality +# - few or no cases score `0` on overall shortlist quality +# - average usefulness is at least `1.5` + +################################################################################ + +## After this runs, you can inspect a compact summary of the results using this: + +OUTPUT_FILE="/tmp/phenotype_recommendation_tests.json" + +echo "INFO: creating output file " ${OUTPUT_FILE} +echo '{"results":[' > ${OUTPUT_FILE} + +### + +echo "INFO: Cardiac defibrillator in situ (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with an implanted cardiac defibrillator", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Fasciitis (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients diagnosed with fasciitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Acute prostatitis (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with acute prostatitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Esophagectomy" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients who underwent esophagectomy", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P][R] Peripheral neuritis" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients diagnosed with peripheral neuritis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P][R] Allergic rhinitis" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with allergic rhinitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Ischemic Heart Disease (Sandhu)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with ischemic heart disease", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Pregnant patients with hemorrhage in early pregnancy or threatened labor", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Lung Resection" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients who underwent lung resection", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Laryngitis" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with laryngitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Regional Enteritis (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with regional enteritis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Renal Sclerosis NOS (VADC)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with renal sclerosis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Other cardiomyopathy (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with cardiomyopathy", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Posterior reversible encephalopathy syndrome PRES" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with a diagnosis of PRES", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Anorexia Nervosa" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with anorexia nervosa", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Dizziness or giddiness including motion sickness and vertigo" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with dizziness, vertigo, or motion sickness", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Polymyalgia Rheumatica (VADC)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with polymyalgia rheumatica", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Adrenal Cortical Steroids Causing Adverse Effects in Therapeutic Use (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with adverse effects from therapeutic corticosteroid use", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P][R] Low blood pressure" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with low blood pressure", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Encephalopathy" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with encephalopathy", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Birdshot chorioretinitis" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with birdshot chorioretinitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Macular Degeneration (Senile) of Retina Nos (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Older adults with macular degeneration", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Autoimmune Hemolytic Anemias (VADC)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with autoimmune hemolytic anemia", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Primary adenocarcinoma of rectum MSI-L" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with MSI-low rectal adenocarcinoma", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Blister (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with blistering skin lesions", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Stomatitis and mucositis (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with stomatitis or mucositis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Neurofibromatosis type 1 (FP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with neurofibromatosis type 1", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Keloid scar (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with keloid scars", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] acetaminophen exposure 10" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with acetaminophen exposure", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Antibiotics Rifamycins 10" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients exposed to rifamycin antibiotics", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Joint/ligament sprain (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with a joint or ligament sprain", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Miscarriage; stillbirth (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Pregnant patients with miscarriage or stillbirth", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Arterial embolism and thrombosis of lower extremity artery (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with arterial embolism or thrombosis of a lower extremity artery", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] New users of Cephalosporin systemetic nested in Urinary Tract Infection" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with a urinary tract infection who are new users of cephalosporins", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] Hospitalization with preinfarction syndrome" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients hospitalized with preinfarction syndrome", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Personal history of diseases of blood and blood-forming organs (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with a personal history of blood or blood-forming organ disease", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Other Benign Pancreatic Conditions (Nguyen)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with benign pancreatic conditions", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Osteoarthrosis Localized Primary (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with primary localized osteoarthritis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] New users of dihydropyridine calcium channel blockers" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"New users of dihydropyridine calcium channel blockers", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Renal Sclerosis NOS (VADC)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Veteran patients with renal sclerosis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Polymyalgia Rheumatica (VADC)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Veteran patients with polymyalgia rheumatica", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Autoimmune Hemolytic Anemias (VADC)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Veteran patients with autoimmune hemolytic anemia", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Cardiac Complications Not Elsewhere Classified (VADC)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Veteran patients with cardiac complications", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Fasciitis (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients diagnosed with fasciitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Stomatitis and mucositis (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with stomatitis or mucositis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Barrett's esophagus (gwPheWAS)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with Barretts esophagus", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Regional Enteritis (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with regional enteritis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Osteoarthrosis Localized Primary (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with primary localized osteoarthritis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Aortic Valve Disease (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with aortic valve disease", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Chronic Periodontitis (Phecode)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with chronic periodontitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Hypertensive chronic kidney disease (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with hypertensive chronic kidney disease", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Other cardiomyopathy (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with cardiomyopathy", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Scleritis and episcleritis (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation \ + -H 'Content-Type: application/json' \ + -d '{"study_intent":"Patients with scleritis or episcleritis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + + +echo "INFO: Other disorders of carbohydrate transport and metabolism (MAP)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"Patients with a carbohydrate transport and metabolism disorder", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: [P] acetaminophen exposure 10" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients with a drug exposure to acetaminophen in the hospital setting", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Dyschromia and Vitiligo" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"Patients diagnosed with dyschromia and vitiligo", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Acute Hepatic Injury with no pre-existing liver disease" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury ", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Nerve Plexus Lesions" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"A PheCode-based definition of patients with nerve plexus lesions", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Posterior reversible encephalopathy syndrome PRES" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients with a diagnosis of PRES", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Ulcerative colitis (chronic)" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients with chronic ulcerative colitis", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Pervasive Developmental Disorders" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"Veteran patients with developmental disorders that are pervasive", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Acute myocardial infarction" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients with at least 2 recorded diagnoses of acute myocardial infarction", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: Antiphospholipid syndrome" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + + +echo "INFO: dementia in older adults" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"older adults with a likely diagnosis of ADRD or late-stage dementia", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} + +echo "," >> ${OUTPUT_FILE} + + +echo "INFO: GI bleeding adverse event outcome" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients who experienced a GI bleed adverse event", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} + +echo "," >> ${OUTPUT_FILE} + + +echo "INFO: running COVID outpatient diagnosis cohort" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients who received a COVID-19 diagnosis in the outpatient setting", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool > /tmp/result.json >> ${OUTPUT_FILE} + +echo "," >> ${OUTPUT_FILE} + +echo "INFO: running abdominal aortic aneurysm in veterans" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"veterans who experienced an abdominal aortic aneurysm ", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool > /tmp/result.json >> ${OUTPUT_FILE} +echo "," >> ${OUTPUT_FILE} + +echo "INFO: COPD phenotype using diagnosis codes" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients with COPD according to diagnostic codes in the EHR", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} + +echo "," >> ${OUTPUT_FILE} + +echo "INFO: heart failure hospitalization cohort" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients hospitalized at least once for heart failure", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} + +echo "," >> ${OUTPUT_FILE} + +echo "INFO: diabetes medication-based phenotype" +curl -s -X POST http://127.0.0.1:8765/flows/phenotype_recommendation -H 'Content-Type: application/json' -d '{"study_intent":"patients who appear to have diabetes based on a medication-based phenotype", "top_k":20, "max_results":3, "candidate_limit":10}' | python -m json.tool >> ${OUTPUT_FILE} + + +echo ']}' >> ${OUTPUT_FILE} + +### + +echo "INFO: Tests completed. File written." +echo "RESULTS SUMMARY:" + +/bin/sh -lc "python - <<'PY' +import json +from pathlib import Path +p = Path(\"$OUTPUT_FILE\") +obj = json.loads(p.read_text()) +results = obj['results'] if isinstance(obj, dict) and 'results' in obj else obj +print('count', len(results)) +for i, item in enumerate(results, 1): + query = item.get('search', {}).get('query') or item.get('study_intent') or '' + recs = item.get('recommendations', {}).get('phenotype_recommendations') or [] + rec_ids = [r.get('phenotype_id') for r in recs] + shortlist = item.get('planning', {}).get('shortlist_ids') or [] + se = item.get('diagnostics', {}).get('planning_rerank', {}).get('shortlist_enforcement', {}) + print(f'CASE {i}: {query}') + print(' shortlist:', shortlist) + print(' rec_ids:', rec_ids) + print(' replaced_ids:', se.get('replaced_ids')) + print(' blocked_pool_ids:', se.get('blocked_pool_ids')) + print(' blocked_candidate_reasons:', se.get('blocked_candidate_reasons')) + print(' duplicate_topic_ids:', se.get('duplicate_topic_ids')) + print(' dedupe_backfilled_ids:', se.get('dedupe_backfilled_ids')) + print(' dedupe_applied:', se.get('dedupe_applied')) + print(' enforced_shortlist_ids:', se.get('enforced_shortlist_ids')) + print(' final_deterministic:', item.get('diagnostics', {}).get('final_deterministic')) + print() +PY" + +/bin/sh -lc "python - <<'PY' +import json +from pathlib import Path +p = Path(\"$OUTPUT_FILE\") +obj = json.loads(p.read_text()) +results = obj['results'] if isinstance(obj, dict) and 'results' in obj else obj +for i, item in enumerate(results, 1): + q = item.get('search', {}).get('query') or item.get('study_intent') or '' + pr = item.get('diagnostics', {}).get('planning_rerank', {}) + print(f'CASE {i}: {q}') + print(' intent_facets_raw:', pr.get('intent_facets_raw')) + print(' intent_facets_effective:', pr.get('intent_facets_effective')) + print(' planning_shortlist:', item.get('planning', {}).get('shortlist_ids')) + print(' planning_reasoning:', item.get('planning', {}).get('reasoning_notes')) + print(' recommendations:') + for rec in item.get('recommendations', {}).get('phenotype_recommendations', []): + print(' ', rec.get('phenotype_id'), '|', rec.get('phenotype_name'), '|', rec.get('justification')) + print() +PY" + +/bin/sh -lc "python - <<'PY' +import json +from pathlib import Path +p = Path(\"$OUTPUT_FILE\") +obj = json.loads(p.read_text()) +results = obj['results'] if isinstance(obj, dict) and 'results' in obj else obj +for i, item in enumerate(results, 1): + q = item.get('search', {}).get('query') or item.get('study_intent') or '' + print(f'CASE {i}: {q}') + cand = item.get('diagnostics', {}).get('planning_rerank', {}).get('candidates', []) + for row in cand[:8]: + reasons = [] + for r in row.get('reasons', []): + kind = r.get('kind') + if kind in ( + 'topic_mismatch', + 'topic_primary', + 'topic_context', + 'context_without_primary', + 'dynamic_clinical_alias_match', + 'dynamic_clinical_alias_context', + ): + reasons.append((kind, r.get('detail'))) + print(' ', row.get('phenotype_id'), '|', row.get('metadata_score'), '|', reasons) + print() +PY" diff --git a/docs/evaluation/phenotype_recommendations/phenotype_rec_benchmark.ps1 b/docs/evaluation/phenotype_recommendations/phenotype_rec_benchmark.ps1 new file mode 100644 index 0000000..bf339ea --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/phenotype_rec_benchmark.ps1 @@ -0,0 +1,147 @@ +$OUTPUT_FILE = "phenotype_results.json" +$SUMMARY_FILE = "phenotype_summary.csv" + +$endpoint = "http://127.0.0.1:8765/flows/phenotype_recommendation" + +$tests = @( + @{ + Name = "Cardiac defibrillator in situ (MAP)" + Intent = "Patients with an implanted cardiac defibrillator" + }, + @{ + Name = "Fasciitis (gwPheWAS)" + Intent = "Patients diagnosed with fasciitis" + }, + @{ + Name = "Acute prostatitis (MAP)" + Intent = "Patients with acute prostatitis" + }, + @{ + Name = "Esophagectomy" + Intent = "Patients who underwent esophagectomy" + }, + @{ + Name = "Peripheral neuritis" + Intent = "Patients diagnosed with peripheral neuritis" + }, + @{ + Name = "TNF-alpha + IL12/23 overlap" + Intent = "Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days" + }, + @{ + Name = "Allergic rhinitis" + Intent = "Patients with allergic rhinitis" + }, + @{ + Name = "Ischemic Heart Disease (Sandhu)" + Intent = "Patients with ischemic heart disease" + }, + @{ + Name = "Hemorrhage in Early Pregnancy" + Intent = "Pregnant patients with hemorrhage in early pregnancy or threatened labor" + }, + @{ + Name = "Lung Resection" + Intent = "Patients who underwent lung resection" + } +) + +# Clear old files +if (Test-Path $OUTPUT_FILE) { + Remove-Item $OUTPUT_FILE +} + +if (Test-Path $SUMMARY_FILE) { + Remove-Item $SUMMARY_FILE +} + +$allResults = @() +$summaryRows = @() + +foreach ($test in $tests) { + + Write-Host "" + Write-Host "=================================================" + Write-Host "INFO: $($test.Name)" + Write-Host "=================================================" + + $body = @{ + study_intent = $test.Intent + top_k = 20 + max_results = 3 + candidate_limit = 10 + } | ConvertTo-Json + + $sw = [System.Diagnostics.Stopwatch]::StartNew() + + try { + + $response = Invoke-RestMethod ` + -Uri $endpoint ` + -Method POST ` + -ContentType "application/json" ` + -Body $body + + $sw.Stop() + + $elapsed = [math]::Round($sw.Elapsed.TotalSeconds, 3) + + Write-Host "SUCCESS" + Write-Host "Elapsed Seconds: $elapsed" + + $resultObject = @{ + benchmark_name = $test.Name + elapsed_seconds = $elapsed + response = $response + } + + $allResults += $resultObject + + $summaryRows += [PSCustomObject]@{ + benchmark_name = $test.Name + elapsed_seconds = $elapsed + } + + } + catch { + + $sw.Stop() + + Write-Host "FAILED" + Write-Host $_.Exception.Message + + $summaryRows += [PSCustomObject]@{ + benchmark_name = $test.Name + elapsed_seconds = -1 + } + } +} + +# Write detailed JSON +$allResults | ConvertTo-Json -Depth 20 | Out-File $OUTPUT_FILE + +# Write CSV summary +$summaryRows | Export-Csv $SUMMARY_FILE -NoTypeInformation + +Write-Host "" +Write-Host "=======================================" +Write-Host "BENCHMARK COMPLETE" +Write-Host "=======================================" +Write-Host "Detailed Results: $OUTPUT_FILE" +Write-Host "Timing Summary : $SUMMARY_FILE" + +Write-Host "" +Write-Host "Average Timing:" +$avg = ($summaryRows | Where-Object {$_.elapsed_seconds -gt 0} | Measure-Object elapsed_seconds -Average).Average +Write-Host ([math]::Round($avg,3)) + +$times = $summaryRows | + Where-Object {$_.elapsed_seconds -gt 0} | + Select-Object -ExpandProperty elapsed_seconds | + Sort-Object + +$median = $times[[math]::Floor($times.Count / 2)] + +Write-Host "" +Write-Host "Median Timing:" +Write-Host $median \ No newline at end of file diff --git a/docs/evaluation/phenotype_recommendations/phenotype_rec_benchmark.sh b/docs/evaluation/phenotype_recommendations/phenotype_rec_benchmark.sh new file mode 100755 index 0000000..0598f85 --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/phenotype_rec_benchmark.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +OUTPUT_FILE="phenotype_results.json" +SUMMARY_FILE="phenotype_summary.csv" +ENDPOINT="http://127.0.0.1:8765/flows/phenotype_recommendation" + +rm -f "$OUTPUT_FILE" "$SUMMARY_FILE" + +echo "benchmark_name,elapsed_seconds" > "$SUMMARY_FILE" +echo "[" > "$OUTPUT_FILE" + +declare -a NAMES=( + "Cardiac defibrillator in situ (MAP)" + "Fasciitis (gwPheWAS)" + "Acute prostatitis (MAP)" + "Esophagectomy" + "Peripheral neuritis" + "TNF-alpha + IL12/23 overlap" + "Allergic rhinitis" + "Ischemic Heart Disease (Sandhu)" + "Hemorrhage in Early Pregnancy" + "Lung Resection" +) + +declare -a INTENTS=( + "Patients with an implanted cardiac defibrillator" + "Patients diagnosed with fasciitis" + "Patients with acute prostatitis" + "Patients who underwent esophagectomy" + "Patients diagnosed with peripheral neuritis" + "Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days" + "Patients with allergic rhinitis" + "Patients with ischemic heart disease" + "Pregnant patients with hemorrhage in early pregnancy or threatened labor" + "Patients who underwent lung resection" +) + +first_result=true + +for i in "${!NAMES[@]}"; do + name="${NAMES[$i]}" + intent="${INTENTS[$i]}" + + echo "" + echo "=================================================" + echo "INFO: $name" + echo "=================================================" + + body=$(jq -n \ + --arg study_intent "$intent" \ + '{ + study_intent: $study_intent, + top_k: 20, + max_results: 3, + candidate_limit: 10 + }') + + start_ns=$(date +%s%N) + + response=$(curl -s -X POST "$ENDPOINT" \ + -H "Content-Type: application/json" \ + -d "$body") + + curl_status=$? + + end_ns=$(date +%s%N) + elapsed_seconds=$(awk "BEGIN {printf \"%.3f\", ($end_ns - $start_ns) / 1000000000}") + + if [[ $curl_status -eq 0 && -n "$response" ]]; then + echo "SUCCESS" + echo "Elapsed Seconds: $elapsed_seconds" + + echo "\"$name\",$elapsed_seconds" >> "$SUMMARY_FILE" + + if [[ "$first_result" == false ]]; then + echo "," >> "$OUTPUT_FILE" + fi + + jq -n \ + --arg benchmark_name "$name" \ + --argjson elapsed_seconds "$elapsed_seconds" \ + --argjson response "$response" \ + '{ + benchmark_name: $benchmark_name, + elapsed_seconds: $elapsed_seconds, + response: $response + }' >> "$OUTPUT_FILE" + + first_result=false + + else + echo "FAILED" + echo "curl exit status: $curl_status" + + echo "\"$name\",-1" >> "$SUMMARY_FILE" + fi +done + +echo "" >> "$OUTPUT_FILE" +echo "]" >> "$OUTPUT_FILE" + +echo "" +echo "=======================================" +echo "BENCHMARK COMPLETE" +echo "=======================================" +echo "Detailed Results: $OUTPUT_FILE" +echo "Timing Summary : $SUMMARY_FILE" + +echo "" +echo "Average Timing:" +awk -F, ' +NR > 1 && $2 > 0 { + sum += $2 + count += 1 +} +END { + if (count > 0) { + printf "%.3f\n", sum / count + } else { + print "NA" + } +} +' "$SUMMARY_FILE" + +echo "" +echo "Median Timing:" +awk -F, ' +NR > 1 && $2 > 0 { + values[count++] = $2 +} +END { + if (count == 0) { + print "NA" + exit + } + + asort(values) + + mid = int(count / 2) + + if (count % 2 == 1) { + printf "%.3f\n", values[mid + 1] + } else { + printf "%.3f\n", (values[mid] + values[mid + 1]) / 2 + } +} +' "$SUMMARY_FILE" diff --git a/docs/evaluation/phenotype_recommendations/testing-results.txt b/docs/evaluation/phenotype_recommendations/testing-results.txt new file mode 100644 index 0000000..9918055 --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/testing-results.txt @@ -0,0 +1,1773 @@ +INFO: creating output file /tmp/phenotype_recommendation_tests.json +INFO: Cardiac defibrillator in situ (MAP) +INFO: Fasciitis (gwPheWAS) +INFO: Acute prostatitis (MAP) +INFO: [P] Esophagectomy +INFO: [P][R] Peripheral neuritis +INFO: [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap +INFO: [P][R] Allergic rhinitis +INFO: Ischemic Heart Disease (Sandhu) +INFO: Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) +INFO: [P] Lung Resection +INFO: [P] Laryngitis +INFO: Regional Enteritis (Phecode) +INFO: Renal Sclerosis NOS (VADC) +INFO: Other cardiomyopathy (MAP) +INFO: [P] Posterior reversible encephalopathy syndrome PRES +INFO: [P] Anorexia Nervosa +INFO: [P] Dizziness or giddiness including motion sickness and vertigo +INFO: Polymyalgia Rheumatica (VADC) +INFO: Adrenal Cortical Steroids Causing Adverse Effects in Therapeutic Use (Phecode) +INFO: [P][R] Low blood pressure +INFO: [P] Encephalopathy +INFO: [P] Birdshot chorioretinitis +INFO: Macular Degeneration (Senile) of Retina Nos (Phecode) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: [P] Primary adenocarcinoma of rectum MSI-L +INFO: Blister (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Neurofibromatosis type 1 (FP) +INFO: Keloid scar (gwPheWAS) +INFO: [P] acetaminophen exposure 10 +INFO: [P] Antibiotics Rifamycins 10 +INFO: Joint/ligament sprain (gwPheWAS) +INFO: Miscarriage; stillbirth (MAP) +INFO: Arterial embolism and thrombosis of lower extremity artery (MAP) +INFO: [P] New users of Cephalosporin systemetic nested in Urinary Tract Infection +INFO: [P] Hospitalization with preinfarction syndrome +INFO: Personal history of diseases of blood and blood-forming organs (MAP) +INFO: Other Benign Pancreatic Conditions (Nguyen) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: [P] New users of dihydropyridine calcium channel blockers +INFO: Renal Sclerosis NOS (VADC) +INFO: Polymyalgia Rheumatica (VADC) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: Cardiac Complications Not Elsewhere Classified (VADC) +INFO: Fasciitis (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Barrett's esophagus (gwPheWAS) +INFO: Regional Enteritis (Phecode) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: Aortic Valve Disease (Phecode) +INFO: Chronic Periodontitis (Phecode) +INFO: Hypertensive chronic kidney disease (MAP) +INFO: Other cardiomyopathy (MAP) +INFO: Scleritis and episcleritis (MAP) +INFO: Tests completed. File written. +RESULTS SUMMARY: +count 54 +CASE 1: Patients with an implanted cardiac defibrillator + shortlist: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + rec_ids: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + final_deterministic: {'selected_ids': ['cipher:15146', 'cipher:2288', 'cipher:13288'], 'matched_llm_ids': ['cipher:15146', 'cipher:2288', 'cipher:13288'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 2: Patients diagnosed with fasciitis + shortlist: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + rec_ids: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + final_deterministic: {'selected_ids': ['cipher:15684', 'cipher:2703', 'cipher:14029'], 'matched_llm_ids': ['cipher:15684', 'cipher:2703', 'cipher:14029'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 3: Patients with acute prostatitis + shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + rec_ids: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + final_deterministic: {'selected_ids': ['ohdsi:283', 'cipher:13720', 'cipher:3500'], 'matched_llm_ids': ['ohdsi:283', 'cipher:13720', 'cipher:3500'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 4: Patients who underwent esophagectomy + shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309', 'ohdsi:870'] + rec_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309', 'ohdsi:870'] + final_deterministic: {'selected_ids': ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309'], 'matched_llm_ids': ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 5: Patients diagnosed with peripheral neuritis + shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + rec_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: ['ohdsi:540'] + dedupe_backfilled_ids: [] + dedupe_applied: True + enforced_shortlist_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + final_deterministic: {'selected_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'matched_llm_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + shortlist: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + rec_ids: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + final_deterministic: {'selected_ids': ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'], 'matched_llm_ids': ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 7: Patients with allergic rhinitis + shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + rec_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + final_deterministic: {'selected_ids': ['ohdsi:508', 'ohdsi:367', 'cipher:2081'], 'matched_llm_ids': ['ohdsi:508', 'ohdsi:367'], 'defaulted_ids': ['cipher:2081'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 8: Patients with ischemic heart disease + shortlist: ['ohdsi:654', 'cipher:16261', 'cipher:29560', 'cipher:29218'] + rec_ids: ['ohdsi:654', 'cipher:16261', 'cipher:29560'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:654', 'cipher:16261', 'cipher:29560', 'cipher:29218'] + final_deterministic: {'selected_ids': ['ohdsi:654', 'cipher:16261', 'cipher:29560'], 'matched_llm_ids': ['ohdsi:654', 'cipher:16261', 'cipher:29560'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + shortlist: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + rec_ids: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + final_deterministic: {'selected_ids': ['cipher:2643', 'cipher:17376', 'cipher:2798'], 'matched_llm_ids': ['cipher:2643', 'cipher:17376'], 'defaulted_ids': ['cipher:2798'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 10: Patients who underwent lung resection + shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + rec_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + final_deterministic: {'selected_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'matched_llm_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 11: Patients with laryngitis + shortlist: ['ohdsi:355', 'cipher:15233', 'cipher:2046', 'cipher:2360'] + rec_ids: ['ohdsi:355', 'cipher:15233', 'cipher:2046'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:355', 'cipher:15233', 'cipher:2046', 'cipher:2360'] + final_deterministic: {'selected_ids': ['ohdsi:355', 'cipher:15233', 'cipher:2046'], 'matched_llm_ids': ['ohdsi:355', 'cipher:15233', 'cipher:2046'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 12: Patients with regional enteritis + shortlist: ['cipher:13571', 'cipher:3534'] + rec_ids: ['cipher:13571', 'cipher:3534'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13571', 'cipher:3534'] + final_deterministic: {'selected_ids': ['cipher:13571', 'cipher:3534'], 'matched_llm_ids': ['cipher:13571', 'cipher:3534'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 13: Patients with renal sclerosis + shortlist: ['cipher:13646', 'cipher:13656', 'cipher:3541', 'cipher:17322'] + rec_ids: ['cipher:13646', 'cipher:13656', 'cipher:3541'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13646', 'cipher:13656', 'cipher:3541', 'cipher:17322'] + final_deterministic: {'selected_ids': ['cipher:13646', 'cipher:13656', 'cipher:3541'], 'matched_llm_ids': ['cipher:13646', 'cipher:13656', 'cipher:3541'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 14: Patients with cardiomyopathy + shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + rec_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + final_deterministic: {'selected_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'matched_llm_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 15: Patients with a diagnosis of PRES + shortlist: ['ohdsi:223'] + rec_ids: ['ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:223'], 'matched_llm_ids': ['ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 16: Patients with anorexia nervosa + shortlist: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + rec_ids: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + final_deterministic: {'selected_ids': ['ohdsi:1340', 'cipher:17187', 'cipher:2117'], 'matched_llm_ids': ['ohdsi:1340'], 'defaulted_ids': ['cipher:17187', 'cipher:2117'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 17: Patients with dizziness, vertigo, or motion sickness + shortlist: ['ohdsi:893', 'ohdsi:244', 'cipher:3402'] + rec_ids: ['ohdsi:893', 'ohdsi:244', 'cipher:3402'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:893', 'ohdsi:244', 'cipher:3402'] + final_deterministic: {'selected_ids': ['ohdsi:893', 'ohdsi:244', 'cipher:3402'], 'matched_llm_ids': ['ohdsi:893', 'ohdsi:244'], 'defaulted_ids': ['cipher:3402'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 18: Patients with polymyalgia rheumatica + shortlist: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + rec_ids: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + final_deterministic: {'selected_ids': ['cipher:30277', 'cipher:13992', 'cipher:3460'], 'matched_llm_ids': ['cipher:30277', 'cipher:13992'], 'defaulted_ids': ['cipher:3460'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + shortlist: ['cipher:2064', 'cipher:2123', 'cipher:3573', 'cipher:2915'] + rec_ids: ['cipher:2064', 'cipher:2123', 'cipher:3573'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2064', 'cipher:2123', 'cipher:3573', 'cipher:2915'] + final_deterministic: {'selected_ids': ['cipher:2064', 'cipher:2123', 'cipher:3573'], 'matched_llm_ids': ['cipher:2064', 'cipher:3573'], 'defaulted_ids': ['cipher:2123'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 20: Patients with low blood pressure + shortlist: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + rec_ids: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + replaced_ids: [] + blocked_pool_ids: ['ohdsi:997'] + blocked_candidate_reasons: {'ohdsi:997': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + final_deterministic: {'selected_ids': ['ohdsi:339', 'ohdsi:890', 'cipher:13390'], 'matched_llm_ids': ['ohdsi:339', 'ohdsi:890'], 'defaulted_ids': ['cipher:13390'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 21: Patients with encephalopathy + shortlist: ['ohdsi:194', 'cipher:2664', 'ohdsi:331', 'ohdsi:223'] + rec_ids: ['ohdsi:194', 'cipher:2664', 'ohdsi:331'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:194', 'cipher:2664', 'ohdsi:331', 'ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:194', 'cipher:2664', 'ohdsi:331'], 'matched_llm_ids': ['ohdsi:194', 'ohdsi:331'], 'defaulted_ids': ['cipher:2664'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 22: Patients with birdshot chorioretinitis + shortlist: ['ohdsi:1223'] + rec_ids: ['ohdsi:1223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1223'] + final_deterministic: {'selected_ids': ['ohdsi:1223'], 'matched_llm_ids': ['ohdsi:1223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 23: Older adults with macular degeneration + shortlist: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + rec_ids: ['cipher:30295', 'cipher:3006', 'cipher:3005'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + final_deterministic: {'selected_ids': ['cipher:30295', 'cipher:3006', 'cipher:3005'], 'matched_llm_ids': ['cipher:30295', 'cipher:3006', 'cipher:3005'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 24: Patients with autoimmune hemolytic anemia + shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'matched_llm_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 25: Patients with MSI-low rectal adenocarcinoma + shortlist: ['ohdsi:836', 'ohdsi:823', 'ohdsi:819'] + rec_ids: ['ohdsi:836', 'ohdsi:823', 'ohdsi:819'] + replaced_ids: ['ohdsi:836', 'ohdsi:823', 'ohdsi:819'] + blocked_pool_ids: ['ohdsi:836', 'ohdsi:823', 'ohdsi:819', 'ohdsi:831', 'ohdsi:843'] + blocked_candidate_reasons: {'ohdsi:836': 'procedure_for_diagnosis_intent', 'ohdsi:823': 'procedure_for_diagnosis_intent', 'ohdsi:819': 'procedure_for_diagnosis_intent', 'ohdsi:831': 'procedure_for_diagnosis_intent', 'ohdsi:843': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:836', 'ohdsi:823', 'ohdsi:819'] + final_deterministic: {'selected_ids': ['ohdsi:836', 'ohdsi:823', 'ohdsi:819'], 'matched_llm_ids': ['ohdsi:836', 'ohdsi:823', 'ohdsi:819'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 26: Patients with blistering skin lesions + shortlist: ['ohdsi:652', 'ohdsi:376'] + rec_ids: ['ohdsi:652', 'ohdsi:376'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:652', 'ohdsi:376'] + final_deterministic: {'selected_ids': ['ohdsi:652', 'ohdsi:376'], 'matched_llm_ids': ['ohdsi:652', 'ohdsi:376'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 27: Patients with stomatitis or mucositis + shortlist: ['cipher:17298', 'cipher:15333', 'cipher:13516', 'cipher:3657'] + rec_ids: ['cipher:17298', 'cipher:15333', 'cipher:13516'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17298', 'cipher:15333', 'cipher:13516', 'cipher:3657'] + final_deterministic: {'selected_ids': ['cipher:17298', 'cipher:15333', 'cipher:13516'], 'matched_llm_ids': ['cipher:17298', 'cipher:15333', 'cipher:13516'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 28: Patients with neurofibromatosis type 1 + shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + rec_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + final_deterministic: {'selected_ids': ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'], 'matched_llm_ids': ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 29: Patients with keloid scars + shortlist: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + rec_ids: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + final_deterministic: {'selected_ids': ['cipher:13930', 'cipher:15610', 'cipher:2950'], 'matched_llm_ids': ['cipher:13930', 'cipher:15610', 'cipher:2950'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 30: Patients with acetaminophen exposure + shortlist: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428', 'ohdsi:1158'] + rec_ids: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428', 'ohdsi:1158'] + final_deterministic: {'selected_ids': ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428'], 'matched_llm_ids': ['ohdsi:1187'], 'defaulted_ids': ['ohdsi:1427', 'ohdsi:1428'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 31: Patients exposed to rifamycin antibiotics + shortlist: ['ohdsi:1211', 'ohdsi:1206'] + rec_ids: ['ohdsi:1211', 'ohdsi:1206'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1211', 'ohdsi:1206'] + final_deterministic: {'selected_ids': ['ohdsi:1211', 'ohdsi:1206'], 'matched_llm_ids': ['ohdsi:1211'], 'defaulted_ids': ['ohdsi:1206'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 32: Patients with a joint or ligament sprain + shortlist: ['cipher:14236', 'cipher:3569', 'cipher:2944', 'cipher:15814'] + rec_ids: ['cipher:14236', 'cipher:3569', 'cipher:2944'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:14236', 'cipher:3569', 'cipher:2944', 'cipher:15814'] + final_deterministic: {'selected_ids': ['cipher:14236', 'cipher:3569', 'cipher:2944'], 'matched_llm_ids': ['cipher:14236'], 'defaulted_ids': ['cipher:3569', 'cipher:2944'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 33: Pregnant patients with miscarriage or stillbirth + shortlist: ['ohdsi:627', 'cipher:3056', 'cipher:15565', 'cipher:13818'] + rec_ids: ['ohdsi:627', 'cipher:3056', 'cipher:15565'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:627', 'cipher:3056', 'cipher:15565', 'cipher:13818'] + final_deterministic: {'selected_ids': ['ohdsi:627', 'cipher:3056', 'cipher:15565'], 'matched_llm_ids': ['ohdsi:627'], 'defaulted_ids': ['cipher:3056', 'cipher:15565'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + shortlist: ['cipher:13354', 'cipher:2142'] + rec_ids: ['cipher:13354', 'cipher:2142'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13354', 'cipher:2142'] + final_deterministic: {'selected_ids': ['cipher:13354', 'cipher:2142'], 'matched_llm_ids': ['cipher:13354', 'cipher:2142'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + shortlist: ['ohdsi:1301', 'ohdsi:1186', 'ohdsi:410', 'cipher:31223'] + rec_ids: ['ohdsi:1301', 'ohdsi:1186', 'ohdsi:410'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1301', 'ohdsi:1186', 'ohdsi:410', 'cipher:31223'] + final_deterministic: {'selected_ids': ['ohdsi:1301', 'ohdsi:1186', 'ohdsi:410'], 'matched_llm_ids': ['ohdsi:1301', 'ohdsi:410'], 'defaulted_ids': ['ohdsi:1186'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 36: Patients hospitalized with preinfarction syndrome + shortlist: ['ohdsi:1081'] + rec_ids: ['ohdsi:1081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1081'] + final_deterministic: {'selected_ids': ['ohdsi:1081'], 'matched_llm_ids': ['ohdsi:1081'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + shortlist: ['cipher:17130', 'cipher:3412'] + rec_ids: ['cipher:17130', 'cipher:3412'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17130', 'cipher:3412'] + final_deterministic: {'selected_ids': ['cipher:17130', 'cipher:3412'], 'matched_llm_ids': ['cipher:17130', 'cipher:3412'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 38: Patients with benign pancreatic conditions + shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + rec_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + final_deterministic: {'selected_ids': ['cipher:16954', 'cipher:16952', 'cipher:16953'], 'matched_llm_ids': ['cipher:16952', 'cipher:16953'], 'defaulted_ids': ['cipher:16954'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 39: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4029', 'cipher:3190', 'cipher:4399'] + rec_ids: ['cipher:3192', 'cipher:4029', 'cipher:3190'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4029', 'cipher:3190', 'cipher:4399'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4029', 'cipher:3190'], 'matched_llm_ids': ['cipher:3192', 'cipher:4029', 'cipher:3190'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 40: New users of dihydropyridine calcium channel blockers + shortlist: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + rec_ids: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + final_deterministic: {'selected_ids': ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052'], 'matched_llm_ids': ['ohdsi:1047', 'ohdsi:1048'], 'defaulted_ids': ['ohdsi:1052'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 41: Veteran patients with renal sclerosis + shortlist: ['cipher:13646', 'cipher:13656', 'cipher:17322', 'cipher:15442'] + rec_ids: ['cipher:13646', 'cipher:13656', 'cipher:17322'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13646', 'cipher:13656', 'cipher:17322', 'cipher:15442'] + final_deterministic: {'selected_ids': ['cipher:13646', 'cipher:13656', 'cipher:17322'], 'matched_llm_ids': ['cipher:13646', 'cipher:13656', 'cipher:17322'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 42: Veteran patients with polymyalgia rheumatica + shortlist: ['cipher:30277', 'cipher:13992', 'cipher:17453', 'cipher:3460'] + rec_ids: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30277', 'cipher:13992', 'cipher:17453', 'cipher:3460'] + final_deterministic: {'selected_ids': ['cipher:30277', 'cipher:13992', 'cipher:17453'], 'matched_llm_ids': ['cipher:30277', 'cipher:13992', 'cipher:17453'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 43: Veteran patients with autoimmune hemolytic anemia + shortlist: ['cipher:12888', 'ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['cipher:12888', 'ohdsi:1018', 'ohdsi:738'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:12888', 'ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['cipher:12888', 'ohdsi:1018', 'ohdsi:738'], 'matched_llm_ids': ['cipher:12888', 'ohdsi:1018', 'ohdsi:738'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 44: Veteran patients with cardiac complications + shortlist: ['cipher:15819', 'cipher:17258'] + rec_ids: ['cipher:15819', 'cipher:17258'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15819', 'cipher:17258'] + final_deterministic: {'selected_ids': ['cipher:15819', 'cipher:17258'], 'matched_llm_ids': ['cipher:15819', 'cipher:17258'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 45: Patients diagnosed with fasciitis + shortlist: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + rec_ids: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + final_deterministic: {'selected_ids': ['cipher:14029', 'cipher:15684', 'cipher:2703'], 'matched_llm_ids': ['cipher:14029', 'cipher:15684', 'cipher:2703'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 46: Patients with stomatitis or mucositis + shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + rec_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + final_deterministic: {'selected_ids': ['cipher:17298', 'cipher:15333', 'cipher:3657'], 'matched_llm_ids': ['cipher:17298', 'cipher:15333'], 'defaulted_ids': ['cipher:3657'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 47: Patients with Barretts esophagus + shortlist: ['cipher:13531', 'cipher:2187', 'cipher:15342', 'cipher:30228'] + rec_ids: ['cipher:13531', 'cipher:2187', 'cipher:15342'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13531', 'cipher:2187', 'cipher:15342', 'cipher:30228'] + final_deterministic: {'selected_ids': ['cipher:13531', 'cipher:2187', 'cipher:15342'], 'matched_llm_ids': ['cipher:13531', 'cipher:2187', 'cipher:15342'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 48: Patients with regional enteritis + shortlist: ['cipher:13571', 'cipher:3534'] + rec_ids: ['cipher:13571', 'cipher:3534'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13571', 'cipher:3534'] + final_deterministic: {'selected_ids': ['cipher:13571', 'cipher:3534'], 'matched_llm_ids': ['cipher:13571', 'cipher:3534'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 49: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + rec_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'matched_llm_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 50: Patients with aortic valve disease + shortlist: ['cipher:13226', 'cipher:13231', 'cipher:30301', 'cipher:17250'] + rec_ids: ['cipher:13226', 'cipher:13231', 'cipher:30301'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13226', 'cipher:13231', 'cipher:30301', 'cipher:17250'] + final_deterministic: {'selected_ids': ['cipher:13226', 'cipher:13231', 'cipher:30301'], 'matched_llm_ids': ['cipher:13226', 'cipher:13231', 'cipher:30301'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 51: Patients with chronic periodontitis + shortlist: ['cipher:13494', 'cipher:3397', 'cipher:15317', 'cipher:2371'] + rec_ids: ['cipher:13494', 'cipher:3397', 'cipher:15317'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13494', 'cipher:3397', 'cipher:15317', 'cipher:2371'] + final_deterministic: {'selected_ids': ['cipher:13494', 'cipher:3397', 'cipher:15317'], 'matched_llm_ids': ['cipher:13494', 'cipher:15317'], 'defaulted_ids': ['cipher:3397'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 52: Patients with hypertensive chronic kidney disease + shortlist: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191', 'cipher:13240'] + rec_ids: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191', 'cipher:13240'] + final_deterministic: {'selected_ids': ['ohdsi:923', 'cipher:2846', 'ohdsi:1191'], 'matched_llm_ids': ['ohdsi:923', 'cipher:2846', 'ohdsi:1191'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 53: Patients with cardiomyopathy + shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30174', 'cipher:30192'] + rec_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30174'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30174', 'cipher:30192'] + final_deterministic: {'selected_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30174'], 'matched_llm_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30174'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 54: Patients with scleritis or episcleritis + shortlist: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + rec_ids: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + final_deterministic: {'selected_ids': ['cipher:30069', 'cipher:3581', 'cipher:13186'], 'matched_llm_ids': ['cipher:30069', 'cipher:13186'], 'defaulted_ids': ['cipher:3581'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 1: Patients with an implanted cardiac defibrillator + intent_facets_raw: {'condition_or_topic': 'Implanted cardiac defibrillator', 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'Cardiac device users', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Implanted cardiac defibrillator is a device'], 'care_setting_cues': ['Inpatient care is typical for device management'], 'population_cues': ['Patients with cardiac devices']} + intent_facets_effective: {'condition_or_topic': 'Implanted cardiac defibrillator', 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'Cardiac device users', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Implanted cardiac defibrillator is a device'], 'care_setting_cues': ['Inpatient care is typical for device management'], 'population_cues': ['Patients with cardiac devices'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + planning_reasoning: ['Selected shortlisted candidates align with Implanted cardiac defibrillator as a procedure-oriented study intent.', 'Included Cardiac defibrillator in situ (gwPheWAS) as a medication based candidate focused on Cardiac Defibrillator.', 'Included Cardiac defibrillator in situ (MAP) as a outcome candidate focused on Cardiac defibrillator in situ.', 'Included Cardiac Defibrillator in Situ (Phecode) as a comorbidity covariate candidate focused on Cardiac Defibrillator in Situ.'] + recommendations: + cipher:15146 | Cardiac defibrillator in situ (gwPheWAS) | This phenotype directly addresses the study intent of patients with implanted cardiac defibrillators, utilizing the Million Veteran Program data. + cipher:2288 | Cardiac defibrillator in situ (MAP) | This phenotype identifies patients based on a machine learning model (MAP) predicting the presence of a cardiac defibrillator, aligning with the study's focus. + cipher:13288 | Cardiac Defibrillator in Situ (Phecode) | This phenotype, derived from the Phecode mapping, represents a clinically relevant definition of a cardiac defibrillator, suitable for analysis given the study's focus. + +CASE 2: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (MAP) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (Phecode) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis, based on ICD codes used in the Million Veteran Program (MVP) phenome-wide GWAS. It directly aligns with the study intent of patients diag + cipher:2703 | Fasciitis (MAP) | This phenotype identifies patients with Fasciitis based on a MAP probability score exceeding a defined threshold. While the MAP algorithm is unsupervised, it provides a probability-based approach rel + cipher:14029 | Fasciitis (Phecode) | This phenotype represents a diagnosis of Fasciitis based on ICD-9 and ICD-10 codes. It is defined by the Phecode mapping system, providing a standard clinical definition. + +CASE 3: Patients with acute prostatitis + intent_facets_raw: {'condition_or_topic': 'Prostatitis', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'Acute', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of prostatitis'], 'care_setting_cues': ['Inpatient care', 'Emergency Department', 'Any care setting'], 'population_cues': ['Acute prostatitis']} + intent_facets_effective: {'condition_or_topic': 'Prostatitis', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'Acute', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of prostatitis'], 'care_setting_cues': ['Inpatient care', 'Emergency Department', 'Any care setting'], 'population_cues': ['Acute prostatitis'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + planning_reasoning: ['Selected shortlisted candidates align with Prostatitis as a diagnosis-oriented study intent.', 'Included [P] Prostatitis as a diagnosis candidate focused on Prostatitis.', 'Included Acute Prostatitis (Phecode) as a diagnosis candidate focused on Acute Prostatitis.', 'Included Prostatitis (MAP) as a diagnosis candidate focused on Prostatitis.'] + recommendations: + ohdsi:283 | [P] Prostatitis | This phenotype directly reflects the study intent of patients with acute prostatitis, specifically capturing both chronic and acute prostatitis. + cipher:13720 | Acute Prostatitis (Phecode) | This phenotype aligns with the study intent of 'acute prostatitis' based on ICD-9 and ICD-10 codes. Provides a defined diagnosis based on existing coding systems. + cipher:3500 | Prostatitis (MAP) | This phenotype represents a diagnosis for acute prostatitis based on a MAP algorithm, offering a probabilistic approach and a defined threshold for classification. Useful for exploring diverse patient + +CASE 4: Patients who underwent esophagectomy + intent_facets_raw: {'condition_or_topic': 'Esophagectomy', 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'surgical patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Surgical procedure'], 'care_setting_cues': ['Hospital inpatient care'], 'population_cues': ['Patients undergoing surgery']} + intent_facets_effective: {'condition_or_topic': 'Esophagectomy', 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'surgical patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Surgical procedure'], 'care_setting_cues': ['Hospital inpatient care'], 'population_cues': ['Patients undergoing surgery'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309', 'ohdsi:870'] + planning_reasoning: ['Selected shortlisted candidates align with Esophagectomy as a procedure-oriented study intent.', 'Included [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults, post op new Afib as a procedure candidate focused on Esophagectomy.'] + recommendations: + ohdsi:1097 | [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib | This phenotype directly aligns with the study intent of patients who underwent esophagectomy and developing postoperative atrial fibrillation. + ohdsi:1294 | [P] Esophagectomy, adults | This phenotype represents surgical procedures involving the creation of an esophageal bypass, which is relevant to the study intent. + ohdsi:1309 | [P] Esophagectomy, adults, post op new Afib | This phenotype represents the procedure of esophagectomy, primarily used for characterizing patients undergoing surgical intervention and postoperative atrial fibrillation. + +CASE 5: Patients diagnosed with peripheral neuritis + intent_facets_raw: {'condition_or_topic': 'peripheral neuritis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'peripheral neuritis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + planning_reasoning: ['Selected shortlisted candidates align with peripheral neuritis as a diagnosis-oriented study intent.', 'Included [P][R] Peripheral neuritis as a diagnosis candidate focused on Peripheral neuritis.', 'Included [P] Peripheral Neuropathy or Neuritits as a diagnosis candidate focused on Peripheral Neuropathy.', 'Included [P][R] Optic neuritis as a diagnosis candidate focused on Optic neuritis.', 'Near-duplicate topical variants were removed to preserve distinct recommendation coverage: ohdsi:540.'] + recommendations: + ohdsi:388 | [P][R] Peripheral neuritis | This phenotype directly represents the diagnosis of peripheral neuritis, aligning with the study intent. + ohdsi:389 | [P] Peripheral Neuropathy or Neuritits | This phenotype identifies the first occurrence of peripheral neuropathy or neuritis, a relevant condition given the study intent. + ohdsi:238 | [P][R] Optic neuritis | This phenotype represents optic neuritis, which can be related to peripheral nerve involvement and aligns with the study intent. + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + intent_facets_raw: {'condition_or_topic': 'TNF-alpha inhibitors and IL-12/23 inhibitors', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'concomitant exposure', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['TNF-alpha inhibitors', 'IL-12/23 inhibitors'], 'care_setting_cues': ['any'], 'population_cues': ['concomitant exposure']} + intent_facets_effective: {'condition_or_topic': 'TNF-alpha inhibitors and IL-12/23 inhibitors', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'concomitant exposure', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['TNF-alpha inhibitors', 'IL-12/23 inhibitors'], 'care_setting_cues': ['any'], 'population_cues': ['concomitant exposure'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + planning_reasoning: ['Selected shortlisted candidates align with TNF-alpha inhibitors and IL-12/23 inhibitors as a medication based-oriented study intent.', 'Included [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap as a medication based candidate focused on IL-23 Inhibitors.', 'Included [P] New users of IL-23 inhibitors as a medication based candidate focused on IL-23 inhibitors.', 'Included [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap as a medication based candidate focused on TNF-alpha Inhibitors, IL23 Inhibitors.'] + recommendations: + ohdsi:760 | [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap | This phenotype represents concomitant use of IL-23 and IL-12_23 inhibitors, directly addressing the study's focus on inhibitor exposure. + ohdsi:1042 | [P] New users of IL-23 inhibitors | Captures the initiation of IL-23 inhibitor use, providing context for the concurrent exposure period. + ohdsi:759 | [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap | This phenotype identifies concomitant use of TNF-alpha and IL23 inhibitors, aligning with the study intent of assessing drug combinations. + +CASE 7: Patients with allergic rhinitis + intent_facets_raw: {'condition_or_topic': 'allergic rhinitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'allergic rhinitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + planning_reasoning: ['Selected shortlisted candidates align with allergic rhinitis as a diagnosis-oriented study intent.', 'Included [P][R] Allergic rhinitis as a diagnosis candidate focused on Allergic rhinitis.', 'Included [P] Allergic Rhinitis as a diagnosis candidate focused on Allergic Rhinitis.', 'Included Allergic rhinitis (MAP) as a diagnosis candidate focused on Allergic Rhinitis.'] + recommendations: + ohdsi:508 | [P][R] Allergic rhinitis | This phenotype represents the diagnosis of Allergic rhinitis, primarily used for cohort definition and aligns with the study intent. + ohdsi:367 | [P] Allergic Rhinitis | This phenotype represents events of Allergic Rhinitis. Primarily intended for diagnosing this condition. + cipher:2081 | Allergic rhinitis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 8: Patients with ischemic heart disease + intent_facets_raw: {'condition_or_topic': 'Ischemic heart disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Adult patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart disease'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Adults']} + intent_facets_effective: {'condition_or_topic': 'Ischemic heart disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Adult patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart disease'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Adults'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:654', 'cipher:16261', 'cipher:29560', 'cipher:29218'] + planning_reasoning: ['Selected shortlisted candidates align with Ischemic heart disease as a diagnosis-oriented study intent.', 'Included [P][R] Ischemic heart disease as a diagnosis candidate focused on Ischemic heart disease.', 'Included Ischemic Heart Disease (Sandhu) as a diagnosis candidate focused on Ischemic Heart Disease.', 'Included Ischemic Heart Disease (HDR UK) as a diagnosis candidate focused on Ischemic Heart Disease.'] + recommendations: + ohdsi:654 | [P][R] Ischemic heart disease | This phenotype represents the diagnosis of Ischemic heart disease, aligning directly with the study intent of patients with ischemic heart disease. + cipher:16261 | Ischemic Heart Disease (Sandhu) | This phenotype identifies patients with Ischemic Heart Disease based on ICD-10 diagnostic codes, a relevant definition for the study intent. + cipher:29560 | Ischemic Heart Disease (HDR UK) | This phenotype identifies patients diagnosed with Ischemic Heart Disease, a leading cause of morbidity and mortality. + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + intent_facets_raw: {'condition_or_topic': 'Hemorrhage, Threatened labor', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Hemorrhage', 'Threatened labor'], 'care_setting_cues': ['any'], 'population_cues': ['Pregnant patients']} + intent_facets_effective: {'condition_or_topic': 'Hemorrhage, Threatened labor', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Hemorrhage', 'Threatened labor'], 'care_setting_cues': ['any'], 'population_cues': ['Pregnant patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + planning_reasoning: ['Selected shortlisted candidates align with Hemorrhage, Threatened labor as a diagnosis-oriented study intent.', 'Included Early or threatened labor; hemorrhage in early pregnancy (MAP) as a complication candidate focused on Early or threatened labor; hemorrhage in early pregnancy.', 'Included Hemorrhage in Early Pregnancy (VADC) as a outcome candidate focused on Hemorrhage.', 'Included Hemorrhage in early pregnancy (MAP) as a diagnosis candidate focused on Hemorrhage in early pregnancy.'] + recommendations: + cipher:2643 | Early or threatened labor; hemorrhage in early pregnancy (MAP) | This phenotype directly addresses the study intent of 'Pregnant patients with hemorrhage in early pregnancy or threatened labor' by identifying a complication of early pregnancy hemorrhage. + cipher:17376 | Hemorrhage in Early Pregnancy (VADC) | This phenotype captures the broader concept of hemorrhage in early pregnancy, complementing the MAP phenotype and providing a broader understanding of the condition. + cipher:2798 | Hemorrhage in early pregnancy (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 10: Patients who underwent lung resection + intent_facets_raw: {'condition_or_topic': 'Lung Resection', 'phenotype_role': 'procedure', 'care_setting': 'inpatient|any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital inpatient'], 'population_cues': ['patients undergoing surgery']} + intent_facets_effective: {'condition_or_topic': 'Lung Resection', 'phenotype_role': 'procedure', 'care_setting': 'inpatient|any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital inpatient'], 'population_cues': ['patients undergoing surgery'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + planning_reasoning: ['Selected shortlisted candidates align with Lung Resection as a procedure-oriented study intent.', 'Included [P] Lung Resection, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, inpt stay, no ED as a procedure candidate focused on Lung Resection.'] + recommendations: + ohdsi:1268 | [P] Lung Resection, adults, inpt stay, no ED, post op new Afib | This phenotype directly addresses the study intent of patients who underwent lung resection and developed post-operative atrial fibrillation. + ohdsi:1308 | [P] Lung Resection, adults, post op new Afib | This phenotype aligns with the study intent, focusing on lung resection and the occurrence of post-operative atrial fibrillation. + ohdsi:869 | [P] Lung Resection, adults, inpt stay, no ED | This phenotype represents lung resection as a surgical procedure, relevant to the study's focus on patients undergoing surgical intervention. + +CASE 11: Patients with laryngitis + intent_facets_raw: {'condition_or_topic': 'Laryngitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of laryngitis'], 'care_setting_cues': ['any care setting'], 'population_cues': ['patients with laryngitis']} + intent_facets_effective: {'condition_or_topic': 'Laryngitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of laryngitis'], 'care_setting_cues': ['any care setting'], 'population_cues': ['patients with laryngitis'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:355', 'cipher:15233', 'cipher:2046', 'cipher:2360'] + planning_reasoning: ['Selected shortlisted candidates align with Laryngitis as a diagnosis-oriented study intent.', 'Included [P] Laryngitis as a diagnosis candidate focused on Laryngitis.', 'Included Acute laryngitis and tracheitis (gwPheWAS) as a diagnosis candidate focused on Acute laryngitis and tracheitis.', 'Included Acute laryngitis and tracheitis (MAP) as a diagnosis candidate focused on Acute laryngitis and tracheitis.'] + recommendations: + ohdsi:355 | [P] Laryngitis | This phenotype directly represents the diagnosis of Laryngitis, aligning with the study intent of patients with laryngitis. + cipher:15233 | Acute laryngitis and tracheitis (gwPheWAS) | This phenotype represents a coded definition of Acute laryngitis and tracheitis, a related condition to laryngitis, used in the Million Veteran Program, which can be relevant to the study intent. + cipher:2046 | Acute laryngitis and tracheitis (MAP) | This phenotype represents a MAP-identified diagnosis of Acute laryngitis and tracheitis, based on probability thresholds. Given the study intent, it is a relevant consideration. + +CASE 12: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'Regional enteritis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a gastrointestinal condition'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Patients with inflammatory bowel disease']} + intent_facets_effective: {'condition_or_topic': 'Regional enteritis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a gastrointestinal condition'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Patients with inflammatory bowel disease'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13571', 'cipher:3534'] + planning_reasoning: ['Selected shortlisted candidates align with Regional enteritis as a diagnosis-oriented study intent.', 'Included Regional Enteritis (Phecode) as a diagnosis candidate focused on Regional Enteritis.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.'] + recommendations: + cipher:13571 | Regional Enteritis (Phecode) | The Phecode grouping for Regional Enteritis, based on ICD-9 and ICD-10 codes, directly aligns with the study intent of identifying patients with regional enteritis. + cipher:3534 | Regional enteritis (MAP) | The MAP algorithm identifies patients with regional enteritis based on a probability cutoff, representing a robust approach to diagnosis using unsupervised clustering. + +CASE 13: Patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13646', 'cipher:13656', 'cipher:3541', 'cipher:17322'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Nephritis Nephrosis Renal Sclerosis (Phecode) as a diagnosis candidate focused on Nephritis Nephrosis Renal Sclerosis.', 'Included Renal Sclerosis NOS (Phecode) as a diagnosis candidate focused on Renal Sclerosis.', 'Included Renal sclerosis, NOS (MAP) as a comorbidity covariate candidate focused on Renal Sclerosis.'] + recommendations: + cipher:13646 | Nephritis Nephrosis Renal Sclerosis (Phecode) | This Phecode definition directly addresses renal sclerosis and provides ICD-9 and ICD-10 codes for diagnosis. + cipher:13656 | Renal Sclerosis NOS (Phecode) | Another Phecode definition specifically for renal sclerosis, offering ICD-9 and ICD-10 codes. + cipher:3541 | Renal sclerosis, NOS (MAP) | MAP phenotype derived from ICD-9 and ICD-10 codes with a probability cutoff, representing renal sclerosis based on MAP algorithm output. + +CASE 14: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Other Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Cardiomyopathy.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.'] + recommendations: + cipher:30192 | Other Cardiomyopathy (HDR UK) | This phenotype directly addresses the study intent of patients with cardiomyopathy, identified through ICD10 codes and hospitalizations. + cipher:31252 | Cardiomyopathy (Knight) | This phenotype also identifies patients with cardiomyopathy based on ICD-10, SNOMED CT and Read Codes v2, aligning with the study intent. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype identifies patients with Takotsubo cardiomyopathy, a recognized type of cardiomyopathy, relevant to the study intent. + +CASE 15: Patients with a diagnosis of PRES + intent_facets_raw: {'condition_or_topic': 'PRES', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of PRES'], 'care_setting_cues': ['any care setting'], 'population_cues': ['patients with PRES']} + intent_facets_effective: {'condition_or_topic': 'PRES', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of PRES'], 'care_setting_cues': ['any care setting'], 'population_cues': ['patients with PRES'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with PRES as a diagnosis-oriented study intent.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | The study intent is PRES, and this phenotype directly represents the diagnosis of PRES as defined in the OHDSI Phenotype Library. + +CASE 16: Patients with anorexia nervosa + intent_facets_raw: {'condition_or_topic': 'anorexia nervosa', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of anorexia nervosa'], 'care_setting_cues': ['outpatient care'], 'population_cues': ['patients with anorexia nervosa']} + intent_facets_effective: {'condition_or_topic': 'anorexia nervosa', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of anorexia nervosa'], 'care_setting_cues': ['outpatient care'], 'population_cues': ['patients with anorexia nervosa'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + planning_reasoning: ['Selected shortlisted candidates align with anorexia nervosa as a diagnosis-oriented study intent.', 'Included [P] Anorexia Nervosa as a diagnosis candidate focused on Anorexia Nervosa.', 'Included Anorexia Nervosa (VADC) as a diagnosis candidate focused on Anorexia Nervosa.', 'Included Anorexia nervosa (MAP) as a diagnosis candidate focused on Anorexia Nervosa.'] + recommendations: + ohdsi:1340 | [P] Anorexia Nervosa | This phenotype represents the diagnosis of Anorexia Nervosa, defined as events lasting 30 days, collapsed within 1 year, aligning with the study intent. + cipher:17187 | Anorexia Nervosa (VADC) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:2117 | Anorexia nervosa (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 17: Patients with dizziness, vertigo, or motion sickness + intent_facets_raw: {'condition_or_topic': 'dizziness vertigo motion sickness', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['dizziness', 'vertigo', 'motion sickness'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'dizziness vertigo motion sickness', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['dizziness', 'vertigo', 'motion sickness'], 'care_setting_cues': ['any'], 'population_cues': ['patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:893', 'ohdsi:244', 'cipher:3402'] + planning_reasoning: ['Selected shortlisted candidates align with dizziness vertigo motion sickness as a diagnosis-oriented study intent.', 'Included [P] Vertigo as a diagnosis candidate focused on Vertigo.', 'Included [P] Dizziness or giddiness including motion sickness and vertigo as a unknown candidate focused on Dizziness.', 'Included Peripheral or central vertigo (MAP) as a diagnosis candidate focused on Vertigo.'] + recommendations: + ohdsi:893 | [P] Vertigo | The phenotype is specifically defined as vertigo, a key component of the study intent. + ohdsi:244 | [P] Dizziness or giddiness including motion sickness and vertigo | This phenotype directly represents symptoms of dizziness, including motion sickness and vertigo, aligning with the study intent. + cipher:3402 | Peripheral or central vertigo (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 18: Patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'Polymyalgia Rheumatica', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'elderly adults', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a rheumatological condition'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Elderly adults']} + intent_facets_effective: {'condition_or_topic': 'Polymyalgia Rheumatica', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'elderly adults', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a rheumatological condition'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Elderly adults'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + planning_reasoning: ['Selected shortlisted candidates align with Polymyalgia Rheumatica as a diagnosis-oriented study intent.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (Phecode) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (MAP) as a comorbidity covariate candidate focused on Polymyalgia Rheumatica.'] + recommendations: + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype is based on ICD-10 codes, Med Codes, and Read codes v2, directly aligning with the study intent of identifying patients with Polymyalgia Rheumatica. + cipher:13992 | Polymyalgia Rheumatica (Phecode) | This phenotype uses Phecode, a mapping of ICD codes to clinically relevant phenotypes, suitable for diagnosis of Polymyalgia Rheumatica. + cipher:3460 | Polymyalgia Rheumatica (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + intent_facets_raw: {'condition_or_topic': 'corticosteroid adverse effects', 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'corticosteroid adverse effects', 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:2064', 'cipher:2123', 'cipher:3573', 'cipher:2915'] + planning_reasoning: ['Selected shortlisted candidates align with corticosteroid adverse effects as a outcome-oriented study intent.', 'Included Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Adrenal Cortical Steroids Adverse Effects.', 'Included Antilipemic and antiarteriosclerotic drugs causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Lipid-Lowering Drug Adverse Effects.', 'Included Salicylates causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Salicylates adverse effects.'] + recommendations: + cipher:2064 | Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) | This phenotype directly addresses the study intent of identifying patients with adverse effects from therapeutic corticosteroid use, based on a MAP clustering algorithm. + cipher:2123 | Antilipemic and antiarteriosclerotic drugs causing adverse effects in therapeutic use (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned complication match. + cipher:3573 | Salicylates causing adverse effects in therapeutic use (MAP) | Salicylates are a type of corticosteroid, and this phenotype identifies patients experiencing adverse effects from their use, aligning with the study intent through a MAP clustering approach. + +CASE 20: Patients with low blood pressure + intent_facets_raw: {'condition_or_topic': 'Hypotension', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Blood pressure diagnosis'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'Hypotension', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Blood pressure diagnosis'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['adult patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + planning_reasoning: ['Selected shortlisted candidates align with Hypotension as a diagnosis-oriented study intent.', 'Included [P][R] Low blood pressure as a severity candidate focused on Hypotension.', 'Included [P] Hypotension as a severity candidate focused on Hypotension.', 'Included Hypotension Nos (Phecode) as a diagnosis candidate focused on Hypotension.'] + recommendations: + ohdsi:339 | [P][R] Low blood pressure | This phenotype directly addresses the study intent of patients with low blood pressure, measuring severity. + ohdsi:890 | [P] Hypotension | This phenotype is a measure of hypotension, which is closely related to low blood pressure and could be a relevant covariate. + cipher:13390 | Hypotension Nos (Phecode) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 21: Patients with encephalopathy + intent_facets_raw: {'condition_or_topic': 'Encephalopathy', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Diagnosis of encephalopathy'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Patients with encephalopathy']} + intent_facets_effective: {'condition_or_topic': 'Encephalopathy', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Diagnosis of encephalopathy'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Patients with encephalopathy'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:194', 'cipher:2664', 'ohdsi:331', 'ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with Encephalopathy as a diagnosis-oriented study intent.', 'Included [P] Encephalopathy or its presentations as a diagnosis candidate focused on Encephalopathy.', 'Included Encephalopathy, not elsewhere classified (MAP) as a diagnosis candidate focused on Encephalopathy.', 'Included [P] Encephalopathy as a diagnosis candidate focused on Encephalopathy.'] + recommendations: + ohdsi:194 | [P] Encephalopathy or its presentations | This phenotype directly addresses the study intent of patients with encephalopathy, representing the clinical identification of the condition. + cipher:2664 | Encephalopathy, not elsewhere classified (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:331 | [P] Encephalopathy | This phenotype represents the clinical diagnosis of Encephalopathy, aligning with the study intent and offering a standard definition of the condition. + +CASE 22: Patients with birdshot chorioretinitis + intent_facets_raw: {'condition_or_topic': 'Birdshot chorioretinitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a retinal condition'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Patients with retinal disease']} + intent_facets_effective: {'condition_or_topic': 'Birdshot chorioretinitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a retinal condition'], 'care_setting_cues': ['All care settings'], 'population_cues': ['Patients with retinal disease'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1223'] + planning_reasoning: ['Selected shortlisted candidates align with Birdshot chorioretinitis as a diagnosis-oriented study intent.', 'Included [P] Birdshot chorioretinitis as a diagnosis candidate focused on Uveitis.'] + recommendations: + ohdsi:1223 | [P] Birdshot chorioretinitis | This phenotype directly addresses the study intent of 'Patients with birdshot chorioretinitis' as a diagnosis of a retinal condition related to uveitis. + +CASE 23: Older adults with macular degeneration + intent_facets_raw: {'condition_or_topic': 'macular degeneration', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of macular degeneration'], 'care_setting_cues': ['any care setting'], 'population_cues': ['older adults']} + intent_facets_effective: {'condition_or_topic': 'macular degeneration', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of macular degeneration'], 'care_setting_cues': ['any care setting'], 'population_cues': ['older adults'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + planning_reasoning: ['Selected shortlisted candidates align with macular degeneration as a diagnosis-oriented study intent.', 'Included Macular Degeneration (HDR UK) as a diagnosis candidate focused on Macular Degeneration.', 'Included Macular degeneration (senile) of retina NOS (MAP) as a diagnosis candidate focused on Macular Degeneration.', 'Included Macular degeneration, dry (MAP) as a diagnosis candidate focused on Macular Degeneration.'] + recommendations: + cipher:30295 | Macular Degeneration (HDR UK) | This phenotype directly addresses the study intent of older adults with macular degeneration based on HDR UK criteria. + cipher:3006 | Macular degeneration (senile) of retina NOS (MAP) | This phenotype identifies patients with macular degeneration based on a MAP phenotype, which aligns with the study's focus on older adults and the diagnosis of this condition. + cipher:3005 | Macular degeneration, dry (MAP) | This phenotype identifies patients with dry macular degeneration based on a MAP probability score, fitting the study's description of older adults and the diagnosis of this condition. + +CASE 24: Patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'Autoimmune hemolytic anemia', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Autoimmune hemolytic anemia', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with Autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Autoimmune hemolytic anemia (OHDSI) as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | Captures the earliest diagnosis of Warm Autoimmune Hemolytic Anemia, aligning with the study's focus. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype directly addresses the study intent of identifying patients with autoimmune hemolytic anemia. + cipher:18441 | Autoimmune hemolytic anemia (OHDSI) | Provides an alternative OHDSI phenotype for autoimmune hemolytic anemia. + +CASE 25: Patients with MSI-low rectal adenocarcinoma + intent_facets_raw: {'condition_or_topic': 'colorectal cancer', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'cancer patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Rectal adenocarcinoma'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients with colorectal cancer']} + intent_facets_effective: {'condition_or_topic': 'colorectal cancer', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'cancer patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Rectal adenocarcinoma'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients with colorectal cancer'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:836', 'ohdsi:823', 'ohdsi:819'] + planning_reasoning: ['Selected shortlisted candidates align with colorectal cancer as a diagnosis-oriented study intent.', 'Included [P] Primary adenocarcinoma of colon MSI-H or dMMR, no surgery or oncological treatment as a diagnosis candidate focused on colorectal cancer.', 'Included [P] Primary adenocarcinoma of colon or rectum, MSI-H or dMMR, oncological treatment no surgery as a diagnosis candidate focused on Colorectal Cancer.', 'Included [P] Primary adenocarcinoma of the colon or rectum, MSI-L, MSI-indeterminate, MSS or pMMR, treated with curative intended surgery as a diagnosis candidate focused on Colorectal Cancer.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:836, ohdsi:823, ohdsi:819.'] + recommendations: + ohdsi:836 | [P] Primary adenocarcinoma of colon MSI-H or dMMR, no surgery or oncological treatment | This phenotype directly addresses the study intent of patients with MSI-low rectal adenocarcinoma by capturing the primary diagnosis with the specified MSI-H/dMMR status and excluding curative surgery + ohdsi:823 | [P] Primary adenocarcinoma of colon or rectum, MSI-H or dMMR, oncological treatment no surgery | This phenotype represents another relevant diagnosis of colorectal cancer with MSI-H or dMMR status, incorporating oncological treatment but excluding surgery, aligning with the study's focus. + ohdsi:819 | [P] Primary adenocarcinoma of the colon or rectum, MSI-L, MSI-indeterminate, MSS or pMMR, treated with curative intended surgery | While focused on surgery, this phenotype captures patients with MSI-L or MSS status, which are relevant variations within the broader colorectal cancer spectrum, providing an additional layer of chara + +CASE 26: Patients with blistering skin lesions + intent_facets_raw: {'condition_or_topic': 'Blistering skin lesions', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Blistering skin lesions - Primary diagnosis'], 'care_setting_cues': ['any - Broad care setting'], 'population_cues': ['unknown - No specific population defined']} + intent_facets_effective: {'condition_or_topic': 'Blistering skin lesions', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Blistering skin lesions - Primary diagnosis'], 'care_setting_cues': ['any - Broad care setting'], 'population_cues': ['unknown - No specific population defined'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:652', 'ohdsi:376'] + planning_reasoning: ['Selected shortlisted candidates align with Blistering skin lesions as a diagnosis-oriented study intent.', 'Included [P][R] Vasculitis of the skin as a diagnosis candidate focused on Vasculitis of the skin.', 'Included [P][R] Bleeding skin as a outcome candidate focused on Bleeding Skin.'] + recommendations: + ohdsi:652 | [P][R] Vasculitis of the skin | This phenotype represents events of Vasculitis of the skin, which aligns with the study intent of 'Patients with blistering skin lesions'. + ohdsi:376 | [P][R] Bleeding skin | This phenotype represents events of Bleeding Skin, a relevant outcome related to blistering skin lesions. + +CASE 27: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'Stomatitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Stomatitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:17298', 'cipher:15333', 'cipher:13516', 'cipher:3657'] + planning_reasoning: ['Selected shortlisted candidates align with Stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and mucositis (gwPheWAS) as a complication candidate focused on Stomatitis and mucositis.', 'Included Stomatitis and Mucositis (Ulcerative) (Phecode) as a comorbidity covariate candidate focused on Stomatitis and Mucositis.'] + recommendations: + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | This phenotype directly addresses the study intent of patients with stomatitis or mucositis, as defined in the VA Data Commons. + cipher:15333 | Stomatitis and mucositis (gwPheWAS) | This phenotype also relates to stomatitis and mucositis and aligns with the study intent. + cipher:13516 | Stomatitis and Mucositis (Ulcerative) (Phecode) | This phenotype represents Stomatitis and Mucositis and aligns with the study intent of identifying patients with this condition. + +CASE 28: Patients with neurofibromatosis type 1 + intent_facets_raw: {'condition_or_topic': 'Neurofibromatosis type 1', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a specific condition'], 'care_setting_cues': ['General patient population'], 'population_cues': ['Patients with Neurofibromatosis type 1']} + intent_facets_effective: {'condition_or_topic': 'Neurofibromatosis type 1', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a specific condition'], 'care_setting_cues': ['General patient population'], 'population_cues': ['Patients with Neurofibromatosis type 1'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + planning_reasoning: ['Selected shortlisted candidates align with Neurofibromatosis type 1 as a diagnosis-oriented study intent.', 'Included [P][R] Neurofibromatosis type 1 as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 without Type 2 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.'] + recommendations: + ohdsi:697 | [P][R] Neurofibromatosis type 1 | This phenotype represents the diagnosis of Neurofibromatosis type 1 (NF1), aligning directly with the study intent of patients with Neurofibromatosis type 1. + ohdsi:304 | Neurofibromatosis type 1 (FP) | This phenotype represents the diagnosis of Neurofibromatosis type 1 (NF1), a relevant and frequently used diagnosis in this patient population. + ohdsi:305 | Neurofibromatosis type 1 without Type 2 (FP) | This phenotype identifies individuals with Neurofibromatosis type 1, excluding those with NF2 or related auditory complications, providing a refined selection for this specific patient cohort. + +CASE 29: Patients with keloid scars + intent_facets_raw: {'condition_or_topic': 'Keloid scars', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients with keloid scars', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Scar tissue'], 'care_setting_cues': ['General healthcare'], 'population_cues': ['Patients']} + intent_facets_effective: {'condition_or_topic': 'Keloid scars', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients with keloid scars', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Scar tissue'], 'care_setting_cues': ['General healthcare'], 'population_cues': ['Patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + planning_reasoning: ['Selected shortlisted candidates align with Keloid scars as a diagnosis-oriented study intent.', 'Included Keloid Scar (Phecode) as a diagnosis candidate focused on Keloid Scar.', 'Included Keloid scar (gwPheWAS) as a comorbidity covariate candidate focused on Keloid Scar.', 'Included Keloid scar (MAP) as a comorbidity covariate candidate focused on Keloid Scar.'] + recommendations: + cipher:13930 | Keloid Scar (Phecode) | Directly reflects the study intent of patients with keloid scars, based on the Phecode definition using ICD-9 and ICD-10 codes. + cipher:15610 | Keloid scar (gwPheWAS) | Represents a comorbidity/covariate related to Keloid Scar, utilized in the Million Veteran Program GWAS study. + cipher:2950 | Keloid scar (MAP) | Identifies patients with Keloid Scar based on a MAP algorithm and probability cutoff, suitable for comorbidity covariate analysis. + +CASE 30: Patients with acetaminophen exposure + intent_facets_raw: {'condition_or_topic': 'acetaminophen exposure', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['medication'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'acetaminophen exposure', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['medication'], 'care_setting_cues': ['any'], 'population_cues': ['patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428', 'ohdsi:1158'] + planning_reasoning: ['Selected shortlisted candidates align with acetaminophen exposure as a medication based-oriented study intent.', 'Included [P] acetaminophen exposure 10 as a medication based candidate focused on acetaminophen exposure.', 'Included [P] Acamprosate, all exposures as a medication based candidate focused on Acamprosate Exposure.', 'Included [P] Disulfiram, all exposures as a medication based candidate focused on Disulfiram Exposure.'] + recommendations: + ohdsi:1187 | [P] acetaminophen exposure 10 | This phenotype directly reflects the study intent of examining acetaminophen exposure, including a 30-day persistence window. + ohdsi:1427 | [P] Acamprosate, all exposures | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1428 | [P] Disulfiram, all exposures | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 31: Patients exposed to rifamycin antibiotics + intent_facets_raw: {'condition_or_topic': 'Rifamycin antibiotic exposure', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Medication'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['Patients']} + intent_facets_effective: {'condition_or_topic': 'Rifamycin antibiotic exposure', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Medication'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['Patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1211', 'ohdsi:1206'] + planning_reasoning: ['Selected shortlisted candidates align with Rifamycin antibiotic exposure as a medication based-oriented study intent.', 'Included [P] Antibiotics Rifamycins 10 as a medication based candidate focused on Rifamycins.', 'Included [P] Antibiotics Macrolides 10 as a medication based candidate focused on Macrolide Drug Exposure.'] + recommendations: + ohdsi:1211 | [P] Antibiotics Rifamycins 10 | This phenotype directly represents exposure to rifamycin antibiotics, aligning with the study intent of patients exposed to these medications. + ohdsi:1206 | [P] Antibiotics Macrolides 10 | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 32: Patients with a joint or ligament sprain + intent_facets_raw: {'condition_or_topic': 'Sprain', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Sprain represents the primary condition of interest.'], 'care_setting_cues': ['any - reflects the broad scope of patient care settings'], 'population_cues': ['patients - defines the target population for the study']} + intent_facets_effective: {'condition_or_topic': 'Sprain', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Sprain represents the primary condition of interest.'], 'care_setting_cues': ['any - reflects the broad scope of patient care settings'], 'population_cues': ['patients - defines the target population for the study'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:14236', 'cipher:3569', 'cipher:2944', 'cipher:15814'] + planning_reasoning: ['Selected shortlisted candidates align with Sprain as a diagnosis-oriented study intent.', 'Included Joint Ligament Sprain (Phecode) as a diagnosis candidate focused on Joint Ligament Sprain.', 'Included Rotator cuff (capsule) sprain (MAP) as a diagnosis candidate focused on Rotator cuff (capsule) sprain.', 'Included Joint-ligament sprain (MAP) as a comorbidity covariate candidate focused on Ligament sprain.'] + recommendations: + cipher:14236 | Joint Ligament Sprain (Phecode) | Directly represents 'Joint or ligament sprain' as defined by the Phecode mapping, a clinically relevant diagnosis based on ICD codes. + cipher:3569 | Rotator cuff (capsule) sprain (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:2944 | Joint-ligament sprain (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 33: Pregnant patients with miscarriage or stillbirth + intent_facets_raw: {'condition_or_topic': 'Miscarriage', 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Pregnant women']} + intent_facets_effective: {'condition_or_topic': 'Miscarriage', 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Pregnant women'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:627', 'cipher:3056', 'cipher:15565', 'cipher:13818'] + planning_reasoning: ['Selected shortlisted candidates align with Miscarriage as a outcome-oriented study intent.', 'Included [P][R] Miscarriage as a outcome candidate focused on Miscarriage.', 'Included Miscarriage; stillbirth (MAP) as a outcome candidate focused on Miscarriage; Stillbirth.', 'Included Miscarriage; stillbirth (gwPheWAS) as a outcome candidate focused on Miscarriage, Stillbirth.'] + recommendations: + ohdsi:627 | [P][R] Miscarriage | This phenotype directly represents the event of miscarriage, aligning with the study intent of pregnant patients with miscarriage or stillbirth. + cipher:3056 | Miscarriage; stillbirth (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned outcome match. + cipher:15565 | Miscarriage; stillbirth (gwPheWAS) | Selected from the top reranked shortlisted candidates as a clinically aligned outcome match. + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + intent_facets_raw: {'condition_or_topic': 'Arterial embolism or thrombosis', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'Lower extremity', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Lower extremity arterial event'], 'care_setting_cues': ['Hospitalized patient', 'Emergency department'], 'population_cues': ['Lower limb', 'Peripheral vasculature']} + intent_facets_effective: {'condition_or_topic': 'Arterial embolism or thrombosis', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'Lower extremity', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Lower extremity arterial event'], 'care_setting_cues': ['Hospitalized patient', 'Emergency department'], 'population_cues': ['Lower limb', 'Peripheral vasculature'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13354', 'cipher:2142'] + planning_reasoning: ['Selected shortlisted candidates align with Arterial embolism or thrombosis as a diagnosis-oriented study intent.', 'Included Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) as a complication candidate focused on Arterial Embolism and Thrombosis.', 'Included Arterial embolism and thrombosis of lower extremity artery (MAP) as a complication candidate focused on Arterial Embolism and Thrombosis.'] + recommendations: + cipher:13354 | Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) | Directly maps to the study intent: Patients with arterial embolism or thrombosis of a lower extremity artery. + cipher:2142 | Arterial embolism and thrombosis of lower extremity artery (MAP) | Represents arterial embolism or thrombosis of the lower extremity artery as defined by the MAP algorithm, providing a probability-based assessment. + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + intent_facets_raw: {'condition_or_topic': 'Urinary Tract Infection', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'new users of cephalosporins', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['new medication use']} + intent_facets_effective: {'condition_or_topic': 'Urinary Tract Infection', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'new users of cephalosporins', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['new medication use'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1301', 'ohdsi:1186', 'ohdsi:410', 'cipher:31223'] + planning_reasoning: ['Selected shortlisted candidates align with Urinary Tract Infection as a diagnosis-oriented study intent.', 'Included [P] Acute Urinary tract infections UTI events as a diagnosis candidate focused on Urinary Tract Infection.', 'Included [P] Urinary tract infectious 10 as a diagnosis candidate focused on Urinary Tract Infection.', 'Included [P] Acute Urinary tract infections UTI as a diagnosis candidate focused on Urinary Tract Infection.'] + recommendations: + ohdsi:1301 | [P] Acute Urinary tract infections UTI events | This phenotype directly addresses the study intent of patients with urinary tract infections and new cephalosporin use. + ohdsi:1186 | [P] Urinary tract infectious 10 | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:410 | [P] Acute Urinary tract infections UTI | This phenotype is a broader capture of UTI diagnoses, which is relevant to the study intent. + +CASE 36: Patients hospitalized with preinfarction syndrome + intent_facets_raw: {'condition_or_topic': 'Preinfarction syndrome', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'Hospitalized patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Diagnosis of preinfarction syndrome'], 'care_setting_cues': ['Inpatient setting'], 'population_cues': ['Patients in hospitals']} + intent_facets_effective: {'condition_or_topic': 'Preinfarction syndrome', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'Hospitalized patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Diagnosis of preinfarction syndrome'], 'care_setting_cues': ['Inpatient setting'], 'population_cues': ['Patients in hospitals'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1081'] + planning_reasoning: ['Selected shortlisted candidates align with Preinfarction syndrome as a diagnosis-oriented study intent.', 'Included [P] FDA AESI Acute Myocardial Infarction or its complications as a diagnosis candidate focused on Acute Myocardial Infarction.'] + recommendations: + ohdsi:1081 | [P] FDA AESI Acute Myocardial Infarction or its complications | This phenotype directly addresses Acute Myocardial Infarction, a key component of the study intent (preinfarction syndrome). + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + intent_facets_raw: {'condition_or_topic': 'Blood or blood-forming organ disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients with personal history', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of blood or blood-forming organ disease'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['Patients with documented personal history']} + intent_facets_effective: {'condition_or_topic': 'Blood or blood-forming organ disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients with personal history', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of blood or blood-forming organ disease'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['Patients with documented personal history'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:17130', 'cipher:3412'] + planning_reasoning: ['Selected shortlisted candidates align with Blood or blood-forming organ disease as a diagnosis-oriented study intent.', 'Included Personal History of Diseases of Blood and Bloodforming Organs (VADC) as a comorbidity covariate candidate focused on Personal History of Blood and Bloodforming Organ Diseases.', 'Included Personal history of diseases of blood and blood-forming organs (MAP) as a comorbidity covariate candidate focused on Personal history of diseases of blood and blood-forming organs.'] + recommendations: + cipher:17130 | Personal History of Diseases of Blood and Bloodforming Organs (VADC) | This phenotype directly addresses the study intent of patients with a personal history of blood or blood-forming organ disease. + cipher:3412 | Personal history of diseases of blood and blood-forming organs (MAP) | The MAP phenotype identifies patients based on ICD codes indicative of a personal history of blood disorders, aligning with the study intent. + +CASE 38: Patients with benign pancreatic conditions + intent_facets_raw: {'condition_or_topic': 'pancreatic conditions', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'pancreatic conditions', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + planning_reasoning: ['Selected shortlisted candidates align with pancreatic conditions as a diagnosis-oriented study intent.', 'Included Pancreas Transplant Recipient (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Conditions.', 'Included Extrahepatic Cholangiocarcinoma (Nguyen) as a diagnosis candidate focused on Pancreatic Cancer.', 'Included Chronic Pancreatitis (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Inflammation.'] + recommendations: + cipher:16954 | Pancreas Transplant Recipient (Nguyen) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + cipher:16952 | Extrahepatic Cholangiocarcinoma (Nguyen) | This phenotype identifies patients with pancreatic cancer based on ICD-10 codes, aligning with the study intent of investigating prevalence among veterans. + cipher:16953 | Chronic Pancreatitis (Nguyen) | Identifies veterans with pancreatic inflammation, which is relevant as a covariate for studying pancreatic cancer prevalence. + +CASE 39: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|any', 'population_cue': 'localized', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis'], 'care_setting_cues': ['outpatient', 'any'], 'population_cues': ['localized']} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|any', 'population_cue': 'localized', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis'], 'care_setting_cues': ['outpatient', 'any'], 'population_cues': ['localized'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:3192', 'cipher:4029', 'cipher:3190', 'cipher:4399'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Thumb Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Osteoarthritis; localized (MAP) as a diagnosis candidate focused on Osteoarthritis localized.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype identifies patients with osteoarthritis based on a MAP algorithm probability cutoff, aligning directly with the study intent of primary localized osteoarthritis. + cipher:4029 | Thumb Osteoarthritis (MVP) | Identifies patients with primary osteoarthritis of the thumb, which may be relevant to understanding osteoarthritis progression, and aligns with the study’s focus on localized osteoarthritis. + cipher:3190 | Osteoarthritis; localized (MAP) | Represents localized osteoarthritis based on MAP unsupervised clustering of ICD codes, primarily used for diagnosis, fitting the study intent. + +CASE 40: New users of dihydropyridine calcium channel blockers + intent_facets_raw: {'condition_or_topic': 'Calcium channel blockers', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Medication use'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['New patients']} + intent_facets_effective: {'condition_or_topic': 'Calcium channel blockers', 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Medication use'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['New patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + planning_reasoning: ['Selected shortlisted candidates align with Calcium channel blockers as a medication based-oriented study intent.', 'Included [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension as a medication based candidate focused on dihydropyridine calcium channel blockers.', 'Included [P] New users of dihydropyridine calcium channel blockers as a medication based candidate focused on dihydropyridine calcium channel blockers.', 'Included [P] New users of Beta blockers nested in Acute Myocardial Infarction as a medication based candidate focused on Beta Blockers.'] + recommendations: + ohdsi:1047 | [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension | This phenotype directly addresses the study intent of identifying patients newly prescribed dihydropyridine calcium channel blockers for hypertension management. + ohdsi:1048 | [P] New users of dihydropyridine calcium channel blockers | This phenotype captures the core concept of new users of dihydropyridine calcium channel blockers. + ohdsi:1052 | [P] New users of Beta blockers nested in Acute Myocardial Infarction | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 41: Veteran patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['veteran patients']} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['veteran patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13646', 'cipher:13656', 'cipher:17322', 'cipher:15442'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Nephritis Nephrosis Renal Sclerosis (Phecode) as a diagnosis candidate focused on Nephritis Nephrosis Renal Sclerosis.', 'Included Renal Sclerosis NOS (Phecode) as a diagnosis candidate focused on Renal Sclerosis.', 'Included Renal Sclerosis NOS (VADC) as a comorbidity covariate candidate focused on Renal Sclerosis.'] + recommendations: + cipher:13646 | Nephritis Nephrosis Renal Sclerosis (Phecode) | This Phecode definition directly addresses renal sclerosis and utilizes ICD-9 and ICD-10 codes, aligning with the study intent of identifying veteran patients with this condition. + cipher:13656 | Renal Sclerosis NOS (Phecode) | Another Phecode definition focused on renal sclerosis, providing an alternative representation of the condition using ICD-9 and ICD-10 codes. + cipher:17322 | Renal Sclerosis NOS (VADC) | This phenotype definition, derived from the VA Data Commons, specifically identifies renal sclerosis as a comorbidity covariate using ICD-9 and ICD-10 codes, relevant to the study population. + +CASE 42: Veteran patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'Polymyalgia rheumatica', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Diagnosis of Polymyalgia Rheumatica'], 'care_setting_cues': ['Any Care Setting'], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'Polymyalgia rheumatica', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Diagnosis of Polymyalgia Rheumatica'], 'care_setting_cues': ['Any Care Setting'], 'population_cues': ['Veterans'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:30277', 'cipher:13992', 'cipher:17453', 'cipher:3460'] + planning_reasoning: ['Selected shortlisted candidates align with Polymyalgia rheumatica as a diagnosis-oriented study intent.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (Phecode) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (VADC) as a comorbidity covariate candidate focused on Polymyalgia Rheumatica.'] + recommendations: + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype is based on ICD-10 codes, which aligns with the study intent of identifying patients with Polymyalgia Rheumatica. + cipher:13992 | Polymyalgia Rheumatica (Phecode) | The Phecode phenotype definition using ICD-9 and ICD-10 codes is relevant for identifying patients with Polymyalgia Rheumatica. + cipher:17453 | Polymyalgia Rheumatica (VADC) | This phenotype, derived from the VA Data Commons, represents a comorbidity/covariate defined by ICD codes, suitable for studying Polymyalgia Rheumatica within the Million Veteran Program. + +CASE 43: Veteran patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'Autoimmune hemolytic anemia', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Diagnosis of a specific blood disorder'], 'care_setting_cues': ['General medical care'], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'Autoimmune hemolytic anemia', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Diagnosis of a specific blood disorder'], 'care_setting_cues': ['General medical care'], 'population_cues': ['Veterans'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:12888', 'ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with Autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included Autoimmune Hemolytic Anemias (Phecode) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + cipher:12888 | Autoimmune Hemolytic Anemias (Phecode) | This Phecode specifically defines Autoimmune Hemolytic Anemia based on ICD codes, aligning with the study's focus and use of PheWAS. + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | This phenotype identifies the earliest diagnosis of Warm Autoimmune Hemolytic Anemia (wAIHA), directly addressing the study intent of Veteran patients with this condition. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype identifies patients with Autoimmune hemolytic anemia, a closely related condition and relevant to the study intent. + +CASE 44: Veteran patients with cardiac complications + intent_facets_raw: {'condition_or_topic': 'cardiac complications', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['Veteran patients']} + intent_facets_effective: {'condition_or_topic': 'cardiac complications', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['diagnosis'], 'care_setting_cues': ['any'], 'population_cues': ['Veteran patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:15819', 'cipher:17258'] + planning_reasoning: ['Selected shortlisted candidates align with cardiac complications as a diagnosis-oriented study intent.', 'Included Complications of cardiac/vascular device, implant, and graft (gwPheWAS) as a complication candidate focused on Cardiac/Vascular Device Complications.', 'Included Cardiac Complications Not Elsewhere Classified (VADC) as a complication candidate focused on Cardiac Complications.'] + recommendations: + cipher:15819 | Complications of cardiac/vascular device, implant, and graft (gwPheWAS) | This phenotype directly addresses cardiac complications, aligning with the study intent of veteran patients with cardiac complications. + cipher:17258 | Cardiac Complications Not Elsewhere Classified (VADC) | This phenotype captures a broad range of cardiac complications, providing a comprehensive view of the patient’s condition, relevant to the study intent. + +CASE 45: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of fasciitis'], 'care_setting_cues': ['any care setting'], 'population_cues': ['patients with fasciitis']} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of fasciitis'], 'care_setting_cues': ['any care setting'], 'population_cues': ['patients with fasciitis'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included Fasciitis (Phecode) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (MAP) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + cipher:14029 | Fasciitis (Phecode) | This phenotype is defined using the Phecode mapping system, which maps ICD codes to clinically relevant diagnoses of Fasciitis. + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis based on ICD codes used in the Million Veteran Program (MVP) phenome-wide GWAS. + cipher:2703 | Fasciitis (MAP) | This phenotype identifies patients with Fasciitis based on a MAP probability score exceeding a defined threshold. It is based on unsupervised clustering and may require caution. + +CASE 46: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'Stomatitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Stomatitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + planning_reasoning: ['Selected shortlisted candidates align with Stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and mucositis (gwPheWAS) as a complication candidate focused on Stomatitis and mucositis.', 'Included Stomatitis and mucositis (ulcerative) (MAP) as a diagnosis candidate focused on Stomatitis and mucositis (ulcerative).'] + recommendations: + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | This phenotype directly addresses the study intent of patients with stomatitis or mucositis. + cipher:15333 | Stomatitis and mucositis (gwPheWAS) | This phenotype also represents stomatitis and mucositis, a relevant complication, aligning with the study intent. + cipher:3657 | Stomatitis and mucositis (ulcerative) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 47: Patients with Barretts esophagus + intent_facets_raw: {'condition_or_topic': "Barrett's esophagus", 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ["diagnosis of Barrett's esophagus"], 'care_setting_cues': ['any clinical setting'], 'population_cues': ["patients with Barrett's esophagus"]} + intent_facets_effective: {'condition_or_topic': "Barrett's esophagus", 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ["diagnosis of Barrett's esophagus"], 'care_setting_cues': ['any clinical setting'], 'population_cues': ["patients with Barrett's esophagus"], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13531', 'cipher:2187', 'cipher:15342', 'cipher:30228'] + planning_reasoning: ["Selected shortlisted candidates align with Barrett's esophagus as a diagnosis-oriented study intent.", "Included Barretts Esophagus (Phecode) as a diagnosis candidate focused on Barrett's Esophagus.", "Included Barrett's esophagus (MAP) as a diagnosis candidate focused on Barrett's esophagus.", "Included Barrett's esophagus (gwPheWAS) as a diagnosis candidate focused on Barrett's esophagus."] + recommendations: + cipher:13531 | Barretts Esophagus (Phecode) | This phenotype represents Barrett's Esophagus based on ICD-9 and ICD-10 codes, aligning directly with the study intent and is a primary diagnosis phenotype. + cipher:2187 | Barrett's esophagus (MAP) | This phenotype identifies Barrett's esophagus using a MAP algorithm based on ICD codes and healthcare utilization, suitable for classification based on probability thresholds. + cipher:15342 | Barrett's esophagus (gwPheWAS) | This phenotype is defined using ICD codes from the Million Veteran Program phenome-wide GWAS, representing a clinically relevant phenotype for Barrett's Esophagus. + +CASE 48: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'Regional enteritis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Inflammatory Bowel Disease', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Regional enteritis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Inflammatory Bowel Disease', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13571', 'cipher:3534'] + planning_reasoning: ['Selected shortlisted candidates align with Regional enteritis as a diagnosis-oriented study intent.', 'Included Regional Enteritis (Phecode) as a diagnosis candidate focused on Regional Enteritis.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.'] + recommendations: + cipher:13571 | Regional Enteritis (Phecode) | This phenotype aligns directly with the study intent of patients with regional enteritis, utilizing the Phecode grouping for ICD-9 and ICD-10 codes. + cipher:3534 | Regional enteritis (MAP) | This phenotype identifies patients with regional enteritis based on a MAP algorithm probability cutoff, providing an alternative approach to diagnosis. + +CASE 49: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|any', 'population_cue': 'localized', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of osteoarthritis'], 'care_setting_cues': ['outpatient care'], 'population_cues': ['localized osteoarthritis']} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|any', 'population_cue': 'localized', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of osteoarthritis'], 'care_setting_cues': ['outpatient care'], 'population_cues': ['localized osteoarthritis'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Finger Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Thumb Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype identifies patients with osteoarthritis based on a MAP algorithm probability cutoff of 0.41, aligning directly with the study intent of patients with primary localized osteoarthritis. + cipher:4399 | Finger Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis, specifically focusing on finger OA, which is relevant to localized osteoarthritis and the specified study intent. + cipher:4029 | Thumb Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis of the thumb, which is relevant to localized osteoarthritis and the specified study intent. + +CASE 50: Patients with aortic valve disease + intent_facets_raw: {'condition_or_topic': 'aortic valve disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'aortic valve disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': [], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13226', 'cipher:13231', 'cipher:30301', 'cipher:17250'] + planning_reasoning: ['Selected shortlisted candidates align with aortic valve disease as a diagnosis-oriented study intent.', 'Included Aortic Valve Disease (Phecode) as a diagnosis candidate focused on Aortic Valve Disease.', 'Included Nonrheumatic Aortic Valve Disorders (Phecode) as a diagnosis candidate focused on Aortic Valve Disorders.', 'Included Nonrheumatic Aortic Valve Disorders (HDR UK) as a diagnosis candidate focused on Aortic Valve Disorders.'] + recommendations: + cipher:13226 | Aortic Valve Disease (Phecode) | This phenotype directly addresses the study intent of patients with aortic valve disease, based on ICD codes. + cipher:13231 | Nonrheumatic Aortic Valve Disorders (Phecode) | This phenotype aligns with the study intent, focusing on nonrheumatic aortic valve disorders. + cipher:30301 | Nonrheumatic Aortic Valve Disorders (HDR UK) | This phenotype identifies patients with nonrheumatic aortic valve disorders using HDR UK criteria and ICD-10 codes, relevant to the study's focus. + +CASE 51: Patients with chronic periodontitis + intent_facets_raw: {'condition_or_topic': 'Chronic periodontitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with periodontal disease', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of periodontal disease'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients affected by gum disease']} + intent_facets_effective: {'condition_or_topic': 'Chronic periodontitis', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with periodontal disease', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of periodontal disease'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients affected by gum disease'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:13494', 'cipher:3397', 'cipher:15317', 'cipher:2371'] + planning_reasoning: ['Selected shortlisted candidates align with Chronic periodontitis as a diagnosis-oriented study intent.', 'Included Chronic Periodontitis (Phecode) as a diagnosis candidate focused on Chronic Periodontitis.', 'Included Periodontitis (acute or chronic) (MAP) as a diagnosis candidate focused on Periodontitis.', 'Included Chronic periodontitis (gwPheWAS) as a comorbidity covariate candidate focused on Chronic Periodontitis.'] + recommendations: + cipher:13494 | Chronic Periodontitis (Phecode) | This phenotype directly aligns with the study intent of 'Patients with chronic periodontitis' as defined using ICD codes. + cipher:3397 | Periodontitis (acute or chronic) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:15317 | Chronic periodontitis (gwPheWAS) | This phenotype is a well-established definition of Chronic Periodontitis utilizing ICD codes and was used in a large-scale GWAS, making it relevant to the study intent. + +CASE 52: Patients with hypertensive chronic kidney disease + intent_facets_raw: {'condition_or_topic': 'Hypertensive chronic kidney disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of chronic kidney disease'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'Hypertensive chronic kidney disease', 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of chronic kidney disease'], 'care_setting_cues': ['Any clinical setting'], 'population_cues': ['Adult patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191', 'cipher:13240'] + planning_reasoning: ['Selected shortlisted candidates align with Hypertensive chronic kidney disease as a diagnosis-oriented study intent.', 'Included [P] Kidney disease as a diagnosis candidate focused on Chronic Kidney Disease.', 'Included Hypertensive chronic kidney disease (MAP) as a comorbidity covariate candidate focused on Hypertensive Chronic Kidney Disease.', 'Included [P] Chronic kidney disease or end stage renal disease 10 as a comorbidity covariate candidate focused on Chronic Kidney Disease.'] + recommendations: + ohdsi:923 | [P] Kidney disease | This phenotype represents the initial record of Chronic Kidney Disease, aligning with the study intent of patients with hypertensive chronic kidney disease. + cipher:2846 | Hypertensive chronic kidney disease (MAP) | This phenotype identifies patients classified as having Hypertensive Chronic Kidney Disease based on a MAP algorithm, which is relevant to the study intent. + ohdsi:1191 | [P] Chronic kidney disease or end stage renal disease 10 | This phenotype captures Chronic Kidney Disease as a covariate for predicting outcomes, aligning with the study intent. + +CASE 53: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a structural heart condition'], 'care_setting_cues': ['Hospital', 'Clinic'], 'population_cues': ['Adult Patients']} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of a structural heart condition'], 'care_setting_cues': ['Hospital', 'Clinic'], 'population_cues': ['Adult Patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30174', 'cipher:30192'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.', 'Included Dilated Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Dilated Cardiomyopathy.'] + recommendations: + cipher:31252 | Cardiomyopathy (Knight) | This phenotype identifies patients with Cardiomyopathy based on ICD-10, SNOMED CT and Read Codes v2, aligning with the study intent. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype directly addresses Takotsubo cardiomyopathy, a recognized type of cardiomyopathy. + cipher:30174 | Dilated Cardiomyopathy (HDR UK) | This phenotype identifies patients with Dilated Cardiomyopathy based on ICD-10 diagnoses or hospitalizations, representing a core clinical condition related to the study intent. + +CASE 54: Patients with scleritis or episcleritis + intent_facets_raw: {'condition_or_topic': 'Scleritis', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Adult patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Scleritis is a diagnosis'], 'care_setting_cues': ['Outpatient care setting'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'Scleritis', 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Adult patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Scleritis is a diagnosis'], 'care_setting_cues': ['Outpatient care setting'], 'population_cues': ['Adult patients'], 'clinical_topic_aliases': []} + planning_shortlist: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + planning_reasoning: ['Selected shortlisted candidates align with Scleritis as a diagnosis-oriented study intent.', 'Included Scleritis and Episcleritis (HDR UK) as a diagnosis candidate focused on Scleritis and Episcleritis.', 'Included Scleritis and episcleritis (MAP) as a diagnosis candidate focused on Scleritis and episcleritis.', 'Included Scleritis and Episcleritis (Phecode) as a diagnosis candidate focused on Scleritis and Episcleritis.'] + recommendations: + cipher:30069 | Scleritis and Episcleritis (HDR UK) | This phenotype directly aligns with the study intent of patients with scleritis or episcleritis, utilizing the HDR UK criteria. + cipher:3581 | Scleritis and episcleritis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:13186 | Scleritis and Episcleritis (Phecode) | This phenotype utilizes the Phecode mapping, based on ICD codes, which is consistent with the study's focus on scleritis and episcleritis diagnosis. + +CASE 1: Patients with an implanted cardiac defibrillator + cipher:15146 | 20.7792 | [('topic_primary', 'Cardiac Defibrillator')] + cipher:2288 | 13.3392 | [('topic_primary', 'Cardiac defibrillator in situ')] + cipher:13288 | 13.2992 | [('topic_primary', 'Cardiac Defibrillator in Situ')] + ohdsi:1102 | -5.65 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + ohdsi:1314 | -5.67 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + ohdsi:875 | -5.71 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + cipher:16289 | -5.8875 | [('topic_mismatch', 'Bleeding')] + cipher:30773 | -5.8875 | [('topic_mismatch', 'Trifascicular Block')] + +CASE 2: Patients diagnosed with fasciitis + cipher:15684 | 35.6925 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:2703 | 35.6525 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:14029 | 28.1125 | [('topic_primary', 'Fasciitis')] + ohdsi:1075 | -3.69 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.75 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + +CASE 3: Patients with acute prostatitis + ohdsi:283 | 34.35 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:3500 | 34.1125 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:15498 | 34.1125 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:13720 | 28.9425 | [('topic_primary', 'Acute Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Prostatitis"]}')] + cipher:2054 | 28.9025 | [('topic_primary', 'Acute Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Prostatitis"]}')] + cipher:15499 | 15.1725 | [('topic_primary', 'Acute Prostatitis')] + cipher:18650 | 13.7554 | [('topic_primary', 'Chronic Prostatitis or Chronic Pelvic Pain Syndrome (MVP)')] + ohdsi:1301 | -5.23 | [('topic_mismatch', 'Urinary Tract Infection')] + +CASE 4: Patients who underwent esophagectomy + ohdsi:1097 | 32.1233 | [('topic_primary', 'Esophagectomy'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Esophagectomy"]}')] + ohdsi:1294 | 26.33 | [('topic_primary', 'Esophagectomy')] + ohdsi:870 | 26.31 | [('topic_primary', 'Esophagectomy')] + ohdsi:1309 | 22.35 | [('topic_primary', 'Esophagectomy')] + ohdsi:877 | -5.75 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1289 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -9.25 | [('topic_mismatch', 'Surgery')] + cipher:30087 | -9.3875 | [('topic_mismatch', 'Primary Malignancy, Oesophageal')] + +CASE 5: Patients diagnosed with peripheral neuritis + ohdsi:388 | 35.85 | [('topic_primary', 'Peripheral neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral neuritis"]}')] + ohdsi:389 | 20.08 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathy"]}')] + ohdsi:238 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + ohdsi:540 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + cipher:30768 | 19.8825 | [('topic_primary', 'Peripheral Neuropathies'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathies"]}')] + ohdsi:236 | 19.6233 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic Peripheral Neuropathy"]}')] + cipher:2808 | 19.1125 | [('topic_primary', 'Peripheral neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hereditary and idiopathic peripheral neuropathy"]}')] + ohdsi:541 | 18.31 | [('topic_primary', 'Idiopathic peripheral neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic peripheral neuropathy"]}')] + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + ohdsi:760 | 27.7071 | [('topic_primary', 'IL-23 Inhibitors')] + ohdsi:1042 | 27.6271 | [('topic_primary', 'IL-23 inhibitors')] + ohdsi:1040 | 27.6071 | [('topic_primary', 'TNF alpha inhibitors')] + ohdsi:1069 | 25.5 | [('topic_primary', 'TNF inhibitors'), ('topic_context', '{"context_conditions": ["Crohns disease"], "target_conditions": ["TNF inhibitors"]}')] + ohdsi:759 | 23.6671 | [('topic_primary', 'TNF-alpha Inhibitors, IL23 Inhibitors')] + ohdsi:1057 | 20.3614 | [('topic_primary', 'IL-23 inhibitors'), ('topic_context', '{"context_conditions": ["Plaque psoriasis", "Psoriasis vulgaris"], "target_conditions": ["IL23 inhibitors"]}')] + ohdsi:1066 | 19.9881 | [('topic_primary', 'Tumor Necrosis Factor alpha (TNFa) inhibitors')] + ohdsi:1068 | 19.75 | [('topic_primary', 'Tumor Necrosis Factor alpha (TNFa) inhibitors'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Tumor Necrosis Factor alpha (TNFa) inhibitors"]}')] + +CASE 7: Patients with allergic rhinitis + ohdsi:508 | 35.85 | [('topic_primary', 'Allergic rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic rhinitis"]}')] + ohdsi:367 | 35.83 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:2081 | 35.6125 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:30258 | 30.1725 | [('topic_primary', 'Allergic and Chronic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic and chronic rhinitis diagnosis"]}')] + cipher:13411 | 28.1125 | [('topic_primary', 'Allergic Rhinitis')] + ohdsi:12 | 23.2067 | [('topic_primary', 'Rhinitis'), ('topic_context', '{"context_conditions": ["Common cold", "Sinusitis", "Respiratory Symptoms"], "target_conditions": ["Rhinitis"]}')] + cipher:15246 | 20.6125 | [('topic_primary', 'Allergic Rhinitis')] + ohdsi:370 | 16.25 | [('topic_primary', 'Allergic Disorder')] + +CASE 8: Patients with ischemic heart disease + ohdsi:654 | 35.85 | [('topic_primary', 'Ischemic heart disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic heart disease"]}')] + cipher:16261 | 35.6525 | [('topic_primary', 'Ischemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic Heart Disease"]}')] + cipher:29560 | 35.5375 | [('topic_primary', 'Ischemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic Heart Disease"]}')] + cipher:29218 | 25.1325 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:30610 | 25.1125 | [('topic_primary', 'Ischaemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischaemic Heart Disease"]}')] + cipher:29772 | 25.1125 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:30617 | 12.6725 | [('topic_primary', 'Coronary Heart Disease')] + cipher:31868 | 11.2792 | [('topic_primary', 'Chronic Ischaemic Heart Disease')] + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + cipher:13824 | 18.1925 | [('topic_primary', 'Early Labor Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Early Labor Hemorrhage"]}')] + cipher:2643 | 16.6411 | [('topic_primary', 'Early or threatened labor; hemorrhage in early pregnancy')] + cipher:2798 | 13.7375 | [('topic_primary', 'Hemorrhage in early pregnancy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hemorrhage in early pregnancy"]}')] + cipher:17376 | 10.4458 | [('topic_primary', 'Hemorrhage')] + cipher:15566 | 9.3625 | [('topic_primary', 'Pregnancy Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pregnancy Hemorrhage"]}')] + cipher:13827 | 4.4458 | [('topic_primary', 'Hemorrhage in Early Pregnancy')] + ohdsi:677 | 4.375 | [('topic_primary', 'Preterm labor with preterm delivery'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Preterm labor with preterm delivery"]}')] + ohdsi:1434 | -10.69 | [('topic_mismatch', 'Pregnancy Loss')] + +CASE 10: Patients who underwent lung resection + ohdsi:1268 | 28.6833 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative Afib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:1308 | 24.5833 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative AFib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:869 | 22.81 | [('topic_primary', 'Lung Resection')] + ohdsi:1293 | 22.79 | [('topic_primary', 'Lung Resection')] + ohdsi:1289 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:877 | -9.25 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1106 | -9.25 | [('topic_mismatch', 'Surgery')] + +CASE 11: Patients with laryngitis + ohdsi:355 | 35.85 | [('topic_primary', 'Laryngitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Laryngitis"]}')] + cipher:2360 | 30.3625 | [('topic_primary', 'Chronic laryngitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic laryngitis"]}')] + cipher:15233 | 27.9458 | [('topic_primary', 'Acute laryngitis and tracheitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute laryngitis", "Tracheitis"]}')] + cipher:13398 | 27.7375 | [('topic_primary', 'Acute Laryngitis and Tracheitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Laryngitis and Tracheitis"]}')] + cipher:2046 | 22.1125 | [('topic_primary', 'Acute laryngitis and tracheitis')] + ohdsi:327 | -3.69 | [('topic_mismatch', 'Pharyngitis')] + ohdsi:9 | -3.71 | [('topic_mismatch', 'Sore throat')] + cipher:29206 | -3.8075 | [('topic_mismatch', 'Peritonsillar Abscess')] + +CASE 12: Patients with regional enteritis + cipher:3534 | 35.6925 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + cipher:13571 | 28.1125 | [('topic_primary', 'Regional Enteritis')] + cipher:15376 | 21.1125 | [('topic_primary', 'Regional Enteritis')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:775 | -3.75 | [('topic_mismatch', 'Inflammatory Bowel Disease')] + cipher:30115 | -3.8875 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + +CASE 13: Patients with renal sclerosis + cipher:13646 | 30.4625 | [('topic_primary', 'Nephritis Nephrosis Renal Sclerosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nephritis Nephrosis Renal Sclerosis"]}')] + cipher:13656 | 28.1925 | [('topic_primary', 'Renal Sclerosis')] + cipher:17322 | 20.6325 | [('topic_primary', 'Renal Sclerosis')] + cipher:3541 | 20.6125 | [('topic_primary', 'Renal Sclerosis')] + ohdsi:1003 | 20.0 | [('topic_primary', 'Renal cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal cancer"]}')] + ohdsi:481 | 18.25 | [('topic_primary', 'Renal failure syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal failure syndrome"]}')] + ohdsi:467 | 8.75 | [('topic_primary', 'Systemic sclerosis')] + cipher:31257 | 8.6125 | [('topic_primary', 'Renal disease')] + +CASE 14: Patients with cardiomyopathy + cipher:30192 | 34.4625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 34.0058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 30.58 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 30.4025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 29.0292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 20.6325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 20.6125 | [('topic_primary', 'Cardiomyopathy')] + cipher:31280 | 20.6125 | [('topic_primary', 'Cardiomyopathy')] + +CASE 15: Patients with a diagnosis of PRES + ohdsi:223 | 21.95 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:1317 | -3.69 | [('topic_mismatch', 'Reye’s syndrome')] + ohdsi:516 | -3.71 | [('topic_mismatch', 'Thrombotic microangiopathy')] + ohdsi:248 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:520 | -3.75 | [('topic_mismatch', 'Hypertensive disorder')] + ohdsi:229 | -3.75 | [('topic_mismatch', 'Progressive Multifocal Leukoencephalopathy (PML)')] + ohdsi:1084 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + +CASE 16: Patients with anorexia nervosa + ohdsi:1340 | 34.29 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:17187 | 34.1125 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:2117 | 34.1125 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:12990 | 26.6125 | [('topic_primary', 'Anorexia Nervosa')] + ohdsi:1339 | 18.5 | [('topic_primary', 'Bulimia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Bulimia Nervosa"]}')] + cipher:30163 | 5.9425 | [('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia and Bulimia Nervosa"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1341 | -5.25 | [('topic_mismatch', 'Eating Disorders')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + +CASE 17: Patients with dizziness, vertigo, or motion sickness + cipher:3402 | 19.8625 | [('topic_primary', 'Vertigo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vertigo"]}')] + cipher:13215 | 18.8125 | [('topic_primary', 'Dizziness and Giddiness (Lightheadedness and Vertigo)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dizziness and Giddiness (Lightheadedness and Vertigo)"]}')] + ohdsi:893 | 16.29 | [('topic_primary', 'Vertigo')] + cipher:2623 | 14.7792 | [('topic_primary', 'Dizziness and giddiness (Light-headedness and vertigo)')] + ohdsi:244 | 12.35 | [('topic_primary', 'Dizziness')] + cipher:15084 | 6.7792 | [('topic_primary', 'Dizziness and giddiness')] + cipher:4387 | -3.8675 | [('topic_mismatch', 'Vestibular Disorders')] + ohdsi:891 | -7.69 | [('topic_mismatch', 'Nausea')] + +CASE 18: Patients with polymyalgia rheumatica + cipher:30277 | 34.2125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + cipher:13992 | 26.6725 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:3460 | 19.1925 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:17453 | 19.1525 | [('topic_primary', 'Polymyalgia Rheumatica')] + ohdsi:670 | -5.25 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -5.25 | [('topic_mismatch', 'Joint stiffness')] + ohdsi:605 | -12.25 | [('topic_mismatch', 'Muscle pain')] + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + cipher:2915 | 21.1125 | [('topic_primary', 'Insulin Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Insulin Adverse Effects"]}')] + cipher:2064 | 18.4125 | [('topic_primary', 'Adrenal Cortical Steroids Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Adrenal Cortical Steroids Adverse Effects"]}')] + cipher:2123 | 18.3125 | [('topic_primary', 'Lipid-Lowering Drug Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Lipid-Lowering Drug Adverse Effects"]}')] + cipher:3573 | 16.1125 | [('topic_primary', 'Salicylates adverse effects')] + cipher:2125 | 16.1125 | [('topic_primary', 'Antirheumatics adverse effects')] + cipher:14303 | 14.8392 | [('topic_primary', 'Adrenal Steroid Adverse Effects')] + cipher:17565 | 14.0192 | [('topic_primary', 'Adverse Effects of Adrenal Steroids')] + cipher:3180 | 13.9792 | [('topic_primary', 'Opiate Use and Adverse Effects')] + +CASE 20: Patients with low blood pressure + cipher:13390 | 35.6125 | [('topic_primary', 'Hypotension'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypotension"]}')] + ohdsi:339 | 21.33 | [('topic_primary', 'Hypotension')] + ohdsi:890 | 21.31 | [('topic_primary', 'Hypotension')] + ohdsi:526 | 17.25 | [('topic_primary', 'Orthostatic hypotension')] + ohdsi:997 | 9.35 | [('topic_primary', 'Hypotension')] + ohdsi:954 | -3.75 | [('topic_mismatch', 'Syncope')] + cipher:4093 | -3.8875 | [('topic_mismatch', 'Peripheral Vascular Disease')] + ohdsi:445 | -10.75 | [('topic_mismatch', 'Hypoglycemia')] + +CASE 21: Patients with encephalopathy + ohdsi:194 | 35.83 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + cipher:2664 | 35.6125 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:331 | 33.81 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:223 | 27.2867 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)'), ('topic_context', '{"context_conditions": ["Eclampsia", "Hypertensive encephalopathy"], "target_conditions": ["Posterior reversible encepha... [truncated 19 chars]')] + ohdsi:936 | -0.8929 | [('topic_context', '{"context_conditions": ["Hepatic necrosis", "Hepatic coma", "Hepatic encephalopathy", "Liver failure", "Liver injury"], ... [truncated 41 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1333 | -3.71 | [('topic_mismatch', 'Advanced Liver Disease')] + ohdsi:544 | -3.75 | [('topic_mismatch', 'Encephalitis')] + ohdsi:1075 | -3.75 | [('topic_mismatch', 'Narcolepsy')] + +CASE 22: Patients with birdshot chorioretinitis + ohdsi:1223 | 8.0167 | [('topic_context', '{"context_conditions": ["Uveitis"], "target_conditions": ["Birdshot chorioretinitis"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1226 | -3.73 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -3.75 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:755 | -3.75 | [('topic_mismatch', 'Uveitis')] + cipher:30185 | -3.8875 | [('topic_mismatch', 'Posterior Uveitis')] + cipher:13118 | -3.8875 | [('topic_mismatch', 'Chorioretinal Inflammations Scars')] + cipher:2341 | -3.8875 | [('topic_mismatch', 'Chorioretinal inflammations, scars, and other disorders of choroid')] + ohdsi:1225 | -10.69 | [('topic_mismatch', 'Uveitis')] + +CASE 23: Older adults with macular degeneration + cipher:30295 | 35.6725 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3006 | 35.6325 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3005 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:2505 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:16256 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:14995 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3007 | 32.1125 | [('topic_primary', 'Macular Degeneration, Wet'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration, Wet"]}')] + ohdsi:536 | 30.6 | [('topic_primary', 'Age related macular degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Age related macular degeneration"]}')] + +CASE 24: Patients with autoimmune hemolytic anemia + cipher:18441 | 35.6925 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:12888 | 35.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune Hemolytic Anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.81 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:901 | 21.75 | [('topic_primary', 'Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anemia"]}')] + cipher:17112 | 17.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemias')] + ohdsi:210 | 15.4167 | [('topic_primary', 'Hemolytic Anemia')] + +CASE 25: Patients with MSI-low rectal adenocarcinoma + ohdsi:836 | 27.75 | [('topic_primary', 'colorectal cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["colorectal cancer"]}')] + ohdsi:823 | 27.75 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:819 | 22.0833 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": ["MSI-L", "MSI-indeterminate", "MSS", "pMMR"], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:831 | 21.0833 | [('topic_primary', 'Colorectal Cancer Treatment'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:843 | 20.35 | [('topic_primary', 'colorectal cancer')] + ohdsi:821 | 20.29 | [('topic_primary', 'Colorectal Cancer')] + ohdsi:812 | 20.27 | [('topic_primary', 'Colorectal Cancer')] + ohdsi:840 | 13.5833 | [('topic_primary', 'colorectal cancer treatment')] + +CASE 26: Patients with blistering skin lesions + ohdsi:652 | 13.875 | [('topic_primary', 'Vasculitis of the skin'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vasculitis of the skin"]}')] + ohdsi:376 | 6.6433 | [('topic_primary', 'Bleeding Skin')] + ohdsi:1168 | 5.54 | [('topic_primary', 'Skin Ulcer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Skin Ulcer"]}')] + ohdsi:414 | 5.25 | [('topic_primary', 'Skin Eruption Symptoms')] + ohdsi:948 | -3.73 | [('topic_mismatch', 'Rash')] + ohdsi:461 | -3.75 | [('topic_mismatch', 'Erythema multiforme')] + ohdsi:462 | -3.75 | [('topic_mismatch', 'Lichen planus')] + cipher:4016 | -3.8875 | [('topic_mismatch', 'Desquamative Rash')] + +CASE 27: Patients with stomatitis or mucositis + cipher:3657 | 27.7575 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 15.8792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 15.8392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 15.3592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 15.3192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 15.2792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -3.8875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -10.8875 | [('topic_mismatch', 'Open Wound')] + +CASE 28: Patients with neurofibromatosis type 1 + ohdsi:697 | 35.85 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:304 | 35.83 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:305 | 35.77 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:696 | 20.81 | [('topic_primary', 'Neurofibromatosis type 2')] + cipher:12649 | 20.1125 | [('topic_primary', 'Neurofibromatosis')] + ohdsi:698 | 20.04 | [('topic_primary', 'Neurofibromatosis syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis syndrome"]}')] + ohdsi:308 | 15.6667 | [('topic_primary', 'Neurofibromatosis'), ('topic_context', '{"context_conditions": ["MRI of Brain", "Ophthalmology Visits"], "target_conditions": ["Neurofibromatosis"]}')] + ohdsi:306 | 13.85 | [('topic_primary', 'Optic Pathway Glioma and Neurofibromatosis')] + +CASE 29: Patients with keloid scars + cipher:13930 | 16.1125 | [('topic_primary', 'Keloid Scar')] + cipher:15610 | 8.7125 | [('topic_primary', 'Keloid Scar')] + cipher:2950 | 8.6525 | [('topic_primary', 'Keloid Scar')] + cipher:18443 | -10.8875 | [('topic_mismatch', 'Severe Cutaneous Adverse Reaction (SCAR)')] + cipher:30650 | -11.3875 | [('topic_mismatch', 'Smoking Status')] + ohdsi:1168 | -14.69 | [('topic_mismatch', 'Skin Ulcer')] + ohdsi:1215 | -14.73 | [('topic_mismatch', 'Cancer')] + ohdsi:1102 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 30: Patients with acetaminophen exposure + ohdsi:1187 | 32.85 | [('topic_primary', 'acetaminophen exposure')] + ohdsi:1427 | 28.5 | [('topic_primary', 'Acamprosate Exposure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acamprosate Exposure"]}')] + ohdsi:1428 | 24.75 | [('topic_primary', 'Disulfiram Exposure')] + ohdsi:1158 | 20.83 | [('topic_primary', 'Aspirin Exposure')] + cipher:31254 | -15.3875 | [('topic_mismatch', 'Liver Disease')] + cipher:30616 | -15.3875 | [('topic_mismatch', 'Substance Misuse')] + ohdsi:1423 | -17.73 | [('topic_mismatch', 'Acute Intoxication')] + ohdsi:735 | -17.75 | [('topic_mismatch', 'Acute Liver Injury')] + +CASE 31: Patients exposed to rifamycin antibiotics + ohdsi:1206 | 16.81 | [('topic_primary', 'Macrolide Drug Exposure')] + ohdsi:1211 | 0.85 | [('topic_mismatch', 'Rifamycins')] + ohdsi:1213 | 0.83 | [('topic_mismatch', 'Streptogramins')] + ohdsi:1202 | 0.77 | [('topic_mismatch', 'Carbapenems')] + ohdsi:1212 | 0.75 | [('topic_mismatch', 'Sulfonamides')] + ohdsi:1209 | 0.75 | [('topic_mismatch', 'Penicillins')] + ohdsi:1208 | 0.75 | [('topic_mismatch', 'Oxazolidinones')] + ohdsi:1207 | 0.75 | [('topic_mismatch', 'Antibiotics - Monobactams')] + +CASE 32: Patients with a joint or ligament sprain + cipher:14236 | 28.6925 | [('topic_primary', 'Joint Ligament Sprain'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Joint Ligament Sprain"]}')] + cipher:3569 | 27.7375 | [('topic_primary', 'Rotator cuff (capsule) sprain'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Rotator cuff (capsule) sprain"]}')] + cipher:2944 | 16.7125 | [('topic_primary', 'Ligament sprain')] + cipher:15814 | 15.3392 | [('topic_primary', 'Joint Ligament Sprain')] + cipher:3072 | 15.2792 | [('topic_primary', 'Muscle-tendon sprain')] + cipher:15813 | 14.6125 | [('topic_primary', 'Rotator cuff (capsule) sprain')] + ohdsi:363 | -3.73 | [('topic_mismatch', 'Joint stiffness')] + cipher:29559 | -7.8475 | [('topic_mismatch', 'Musculoskeletal Pain and Injury')] + +CASE 33: Pregnant patients with miscarriage or stillbirth + ohdsi:627 | 31.83 | [('topic_primary', 'Miscarriage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage"]}')] + cipher:3056 | 26.4025 | [('topic_primary', 'Miscarriage; Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:15565 | 26.3625 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:13818 | 26.3625 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + ohdsi:1434 | -7.65 | [('topic_mismatch', 'Pregnancy Loss')] + ohdsi:1432 | -7.69 | [('topic_mismatch', 'Stillbirth')] + ohdsi:1431 | -7.73 | [('topic_mismatch', 'Ectopic Pregnancy')] + ohdsi:606 | -7.75 | [('topic_mismatch', 'Stillbirth')] + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + cipher:13354 | 19.2775 | [('topic_primary', 'Arterial Embolism and Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism and Thrombosis"]}')] + cipher:2142 | 19.2375 | [('topic_primary', 'Arterial Embolism and Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism and Thrombosis"]}')] + cipher:31817 | 15.6125 | [('topic_primary', 'Embolism or Thrombosis')] + cipher:15204 | 13.6125 | [('topic_primary', 'Arterial Embolism and Thrombosis')] + cipher:31293 | 13.6125 | [('topic_primary', 'Arterial Embolism and Thrombosis')] + cipher:31819 | 12.6925 | [('topic_primary', 'Arterial Embolism, Upper Extremity'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism"]}')] + ohdsi:1090 | 6.25 | [('topic_primary', 'Pulmonary Embolism'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pulmonary Embolism"]}')] + cipher:31820 | -12.7875 | [('topic_mismatch', 'Lower Extremity Vascular Disease')] + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + cipher:31223 | 35.6125 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + cipher:30639 | 35.6125 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1301 | 32.1389 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": ["Cystitis", "Asymptomatic bacteriuria"], "target_conditions": ["Acute Urinary tract infections U... [truncated 12 chars]')] + ohdsi:1186 | 31.81 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:410 | 28.25 | [('topic_primary', 'Urinary Tract Infection')] + ohdsi:861 | 22.5 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": ["Pyuria", "Bacteriuria", "Cystitis"], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1061 | -7.65 | [('topic_mismatch', 'Cephalosporin Exposure')] + ohdsi:1060 | -7.67 | [('topic_mismatch', 'Fluoroquinolone Use')] + +CASE 36: Patients hospitalized with preinfarction syndrome + ohdsi:939 | -1.4357 | [('topic_context', '{"context_conditions": ["Preinfarction Syndrome", "Emergency Room Visit", "Inpatient Visit"], "target_conditions": ["Hos... [truncated 15 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1081 | -1.73 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30728 | -1.8875 | [('topic_mismatch', 'Unstable Angina')] + cipher:29772 | -5.3875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:30101 | -5.3875 | [('topic_mismatch', 'Myocardial Infarction')] + ohdsi:263 | -8.75 | [('topic_mismatch', 'Unstable Angina and NSTEMI')] + ohdsi:262 | -8.75 | [('topic_mismatch', 'Unstable Angina')] + cipher:30617 | -12.8275 | [('topic_mismatch', 'Coronary Heart Disease')] + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + cipher:3412 | 5.0925 | [('topic_primary', 'Personal history of diseases of blood and blood-forming organs')] + cipher:17130 | 5.0125 | [('topic_primary', 'Personal History of Blood and Bloodforming Organ Diseases')] + cipher:18428 | -3.8675 | [('topic_mismatch', 'Pancytopenia')] + cipher:30246 | -3.8875 | [('topic_mismatch', 'Aplastic Anaemias')] + cipher:30138 | -3.8875 | [('topic_mismatch', 'Hyposplenism')] + cipher:30287 | -3.8875 | [('topic_mismatch', 'Myelodysplastic Syndromes')] + cipher:29220 | -3.8875 | [('topic_mismatch', 'Anemias, Other')] + cipher:30672 | -3.8875 | [('topic_mismatch', 'Thalassaemia Trait')] + +CASE 38: Patients with benign pancreatic conditions + cipher:16955 | 20.7125 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16954 | 20.6725 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16947 | 20.6325 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16952 | 19.2775 | [('topic_primary', 'Pancreatic Cancer'), ('topic_context', '{"context_conditions": ["PSC", "IBD"], "target_conditions": ["Pancreatic Cancer"]}')] + cipher:16953 | 8.6125 | [('topic_primary', 'Pancreatic Inflammation')] + ohdsi:496 | -3.75 | [('topic_mismatch', 'Abdominal Pain')] + cipher:30223 | -3.8875 | [('topic_mismatch', 'Benign Neoplasm of Stomach and Duodenum')] + cipher:30238 | -3.8875 | [('topic_mismatch', 'Pancreatitis')] + +CASE 39: Patients with primary localized osteoarthritis + cipher:3192 | 34.1725 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9425 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1325 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.75 | [('topic_primary', 'Osteoarthritis')] + +CASE 40: New users of dihydropyridine calcium channel blockers + ohdsi:1047 | 41.705 | [('topic_primary', 'dihydropyridine calcium channel blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["dihydropyridine calcium channel blockers"]}')] + ohdsi:1048 | 34.85 | [('topic_primary', 'dihydropyridine calcium channel blockers')] + ohdsi:1036 | 25.06 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1049 | 24.395 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Hypertension", "Essential Hypertension"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1052 | 24.29 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Acute Myocardial Infarction"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1046 | 4.75 | [('topic_mismatch', 'Thiazide diuretics')] + ohdsi:1035 | 2.75 | [('topic_mismatch', 'Thiazide diuretics')] + cipher:30608 | -11.8875 | [('topic_mismatch', 'Cardiovascular Risk Score')] + +CASE 41: Veteran patients with renal sclerosis + cipher:13646 | 30.3625 | [('topic_primary', 'Nephritis Nephrosis Renal Sclerosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nephritis Nephrosis Renal Sclerosis"]}')] + cipher:13656 | 28.1725 | [('topic_primary', 'Renal Sclerosis')] + cipher:17322 | 21.7125 | [('topic_primary', 'Renal Sclerosis')] + cipher:15442 | 17.6125 | [('topic_primary', 'Nephritis, Nephrosis, Renal Sclerosis')] + cipher:18902 | 9.6125 | [('topic_primary', 'Renal Failure')] + cipher:31257 | 8.6125 | [('topic_primary', 'Renal disease')] + cipher:16003 | -3.8875 | [('topic_mismatch', 'Chronic Kidney Disease')] + ohdsi:964 | -11.17 | [('topic_mismatch', 'Chronic Kidney Disease')] + +CASE 42: Veteran patients with polymyalgia rheumatica + cipher:30277 | 35.7125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + cipher:13992 | 28.1525 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:17453 | 21.6925 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:3460 | 20.6725 | [('topic_primary', 'Polymyalgia Rheumatica')] + ohdsi:670 | -3.75 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -3.75 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30630 | -11.3875 | [('topic_mismatch', 'Rheumatoid Arthritis')] + +CASE 43: Veteran patients with autoimmune hemolytic anemia + cipher:18441 | 35.6725 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:12888 | 35.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune Hemolytic Anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.83 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:17112 | 18.6325 | [('topic_primary', 'Autoimmune Hemolytic Anemias')] + cipher:2178 | 14.7792 | [('topic_primary', 'Autoimmune hemolytic anemias (MAP)')] + cipher:3120 | 11.2792 | [('topic_primary', 'Non-autoimmune hemolytic anemias')] + +CASE 44: Veteran patients with cardiac complications + cipher:17258 | 22.1125 | [('topic_primary', 'Cardiac Complications')] + cipher:15819 | 18.1125 | [('topic_primary', 'Cardiac/Vascular Device Complications')] + ohdsi:1081 | -3.67 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30192 | -3.8875 | [('topic_mismatch', 'Cardiomyopathy')] + cipher:29218 | -3.8875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:16294 | -9.8875 | [('topic_mismatch', 'Cardiovascular Disease Mortality')] + cipher:16278 | -11.3475 | [('topic_mismatch', 'VA Administrative Data')] + cipher:30617 | -11.3875 | [('topic_mismatch', 'Coronary Heart Disease')] + +CASE 45: Patients diagnosed with fasciitis + cipher:15684 | 35.6925 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:2703 | 35.6525 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:14029 | 28.1125 | [('topic_primary', 'Fasciitis')] + ohdsi:1075 | -3.69 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.75 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + +CASE 46: Patients with stomatitis or mucositis + cipher:3657 | 27.7575 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 15.8792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 15.8392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 15.3592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 15.3192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 15.2792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -3.8875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -10.8875 | [('topic_mismatch', 'Open Wound')] + +CASE 47: Patients with Barretts esophagus + cipher:13531 | 35.6925 | [('topic_primary', "Barrett's Esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s Esophagus"]}')] + cipher:2187 | 35.6725 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + cipher:15342 | 35.6125 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + cipher:30228 | 19.9625 | [('topic_primary', "Barrett's Oesophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s Oesophagus"]}')] + ohdsi:447 | -3.73 | [('topic_mismatch', 'Esophagitis')] + ohdsi:525 | -3.75 | [('topic_mismatch', 'Gastroesophageal Reflux Disease')] + ohdsi:446 | -3.75 | [('topic_mismatch', 'Eosinophilic esophagitis')] + cipher:30087 | -3.8875 | [('topic_mismatch', 'Primary Malignancy, Oesophageal')] + +CASE 48: Patients with regional enteritis + cipher:3534 | 35.6925 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + cipher:13571 | 28.1125 | [('topic_primary', 'Regional Enteritis')] + cipher:15376 | 21.1125 | [('topic_primary', 'Regional Enteritis')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:775 | -3.75 | [('topic_mismatch', 'Inflammatory Bowel Disease')] + cipher:30115 | -3.8875 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + +CASE 49: Patients with primary localized osteoarthritis + cipher:3192 | 34.1725 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9425 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1325 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.75 | [('topic_primary', 'Osteoarthritis')] + +CASE 50: Patients with aortic valve disease + cipher:13226 | 28.1125 | [('topic_primary', 'Aortic Valve Disease')] + cipher:13231 | 25.1125 | [('topic_primary', 'Aortic Valve Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Aortic Valve Disorders"]}')] + cipher:30301 | 24.6958 | [('topic_primary', 'Aortic Valve Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nonrheumatic aortic valve disorders"]}')] + cipher:17250 | 20.6125 | [('topic_primary', 'Aortic Valve Disease')] + cipher:2131 | 20.6125 | [('topic_primary', 'Aortic valve disease')] + ohdsi:1172 | 0.79 | [('topic_primary', 'Heart valve disorder')] + ohdsi:1103 | -0.9833 | [('topic_primary', 'Cardiac Valve Surgery'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Cardiac Valve Surgery"]}')] + ohdsi:876 | -4.17 | [('topic_primary', 'Cardiac Valve Surgery')] + +CASE 51: Patients with chronic periodontitis + cipher:2371 | 28.1725 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13494 | 28.1125 | [('topic_primary', 'Chronic Periodontitis')] + cipher:3397 | 25.2125 | [('topic_primary', 'Periodontitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Periodontitis"]}')] + cipher:15317 | 20.6325 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13492 | 20.1925 | [('topic_primary', 'Periodontitis')] + cipher:13493 | 16.1125 | [('topic_primary', 'Acute Periodontitis')] + cipher:15315 | 12.6125 | [('topic_primary', 'Periodontitis')] + cipher:29206 | -3.8875 | [('topic_mismatch', 'Peritonsillar Abscess')] + +CASE 52: Patients with hypertensive chronic kidney disease + ohdsi:923 | 30.5 | [('topic_primary', 'Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic Kidney Disease"]}')] + cipher:2846 | 28.1125 | [('topic_primary', 'Hypertensive Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertensive Chronic Kidney Disease"]}')] + cipher:13240 | 20.6125 | [('topic_primary', 'Hypertensive Chronic Kidney Disease')] + ohdsi:1191 | 19.08 | [('topic_primary', 'Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic Kidney Disease"]}')] + ohdsi:964 | 16.85 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31686 | 16.6525 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31287 | 14.1325 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31697 | 7.8725 | [('topic_primary', 'Hypertensive Heart and Renal Disease')] + +CASE 53: Patients with cardiomyopathy + cipher:30192 | 32.9625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 32.5058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 29.08 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 28.9025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 27.5292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 19.1325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + cipher:31280 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + +CASE 54: Patients with scleritis or episcleritis + cipher:30069 | 27.2125 | [('topic_primary', 'Scleritis and Episcleritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Scleritis and Episcleritis"]}')] + cipher:3581 | 27.1525 | [('topic_primary', 'Scleritis and episcleritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Scleritis and episcleritis"]}')] + cipher:13186 | 21.2792 | [('topic_primary', 'Scleritis and Episcleritis')] + cipher:15063 | 14.2792 | [('topic_primary', 'Scleritis and Episcleritis')] + ohdsi:1226 | -5.23 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -5.25 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:1223 | -5.25 | [('topic_mismatch', 'Uveitis')] + ohdsi:620 | -5.25 | [('topic_mismatch', 'Uveitis')] + diff --git a/docs/evaluation/phenotype_recommendations/testing-weighted_0.7_0.3.txt b/docs/evaluation/phenotype_recommendations/testing-weighted_0.7_0.3.txt new file mode 100644 index 0000000..08e3571 --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/testing-weighted_0.7_0.3.txt @@ -0,0 +1,2324 @@ +INFO: creating output file /tmp/phenotype_recommendation_tests.json +INFO: Cardiac defibrillator in situ (MAP) +INFO: Fasciitis (gwPheWAS) +INFO: Acute prostatitis (MAP) +INFO: [P] Esophagectomy +INFO: [P][R] Peripheral neuritis +INFO: [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap +INFO: [P][R] Allergic rhinitis +INFO: Ischemic Heart Disease (Sandhu) +INFO: Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) +INFO: [P] Lung Resection +INFO: [P] Laryngitis +INFO: Regional Enteritis (Phecode) +INFO: Renal Sclerosis NOS (VADC) +INFO: Other cardiomyopathy (MAP) +INFO: [P] Posterior reversible encephalopathy syndrome PRES +INFO: [P] Anorexia Nervosa +INFO: [P] Dizziness or giddiness including motion sickness and vertigo +INFO: Polymyalgia Rheumatica (VADC) +INFO: Adrenal Cortical Steroids Causing Adverse Effects in Therapeutic Use (Phecode) +INFO: [P][R] Low blood pressure +INFO: [P] Encephalopathy +INFO: [P] Birdshot chorioretinitis +INFO: Macular Degeneration (Senile) of Retina Nos (Phecode) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: [P] Primary adenocarcinoma of rectum MSI-L +INFO: Blister (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Neurofibromatosis type 1 (FP) +INFO: Keloid scar (gwPheWAS) +INFO: [P] acetaminophen exposure 10 +INFO: [P] Antibiotics Rifamycins 10 +INFO: Joint/ligament sprain (gwPheWAS) +INFO: Miscarriage; stillbirth (MAP) +INFO: Arterial embolism and thrombosis of lower extremity artery (MAP) +INFO: [P] New users of Cephalosporin systemetic nested in Urinary Tract Infection +INFO: [P] Hospitalization with preinfarction syndrome +INFO: Personal history of diseases of blood and blood-forming organs (MAP) +INFO: Other Benign Pancreatic Conditions (Nguyen) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: [P] New users of dihydropyridine calcium channel blockers +INFO: Renal Sclerosis NOS (VADC) +INFO: Polymyalgia Rheumatica (VADC) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: Cardiac Complications Not Elsewhere Classified (VADC) +INFO: Fasciitis (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Barrett's esophagus (gwPheWAS) +INFO: Regional Enteritis (Phecode) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: Aortic Valve Disease (Phecode) +INFO: Chronic Periodontitis (Phecode) +INFO: Hypertensive chronic kidney disease (MAP) +INFO: Other cardiomyopathy (MAP) +INFO: Scleritis and episcleritis (MAP) +INFO: Other disorders of carbohydrate transport and metabolism (MAP) +INFO: [P] acetaminophen exposure 10 +INFO: Dyschromia and Vitiligo +INFO: Acute Hepatic Injury with no pre-existing liver disease +INFO: Nerve Plexus Lesions +INFO: Posterior reversible encephalopathy syndrome PRES +INFO: Ulcerative colitis (chronic) +INFO: Pervasive Developmental Disorders +INFO: Acute myocardial infarction +INFO: Antiphospholipid syndrome +INFO: dementia in older adults +INFO: GI bleeding adverse event outcome +INFO: running COVID outpatient diagnosis cohort +INFO: running abdominal aortic aneurysm in veterans +INFO: COPD phenotype using diagnosis codes +INFO: heart failure hospitalization cohort +INFO: diabetes medication-based phenotype +INFO: Tests completed. File written. +RESULTS SUMMARY: +count 71 +CASE 1: Patients with an implanted cardiac defibrillator + shortlist: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + rec_ids: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + final_deterministic: {'selected_ids': ['cipher:15146', 'cipher:2288', 'cipher:13288'], 'matched_llm_ids': ['cipher:15146', 'cipher:2288', 'cipher:13288'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 2: Patients diagnosed with fasciitis + shortlist: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + rec_ids: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + final_deterministic: {'selected_ids': ['cipher:14029', 'cipher:15684', 'cipher:2703'], 'matched_llm_ids': ['cipher:14029', 'cipher:15684', 'cipher:2703'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 3: Patients with acute prostatitis + shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + rec_ids: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + final_deterministic: {'selected_ids': ['ohdsi:283', 'cipher:13720', 'cipher:3500'], 'matched_llm_ids': ['ohdsi:283', 'cipher:13720', 'cipher:3500'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 4: Patients who underwent esophagectomy + shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309', 'ohdsi:870'] + rec_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309', 'ohdsi:870'] + final_deterministic: {'selected_ids': ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309'], 'matched_llm_ids': ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 5: Patients diagnosed with peripheral neuritis + shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + rec_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: ['ohdsi:540'] + dedupe_backfilled_ids: [] + dedupe_applied: True + enforced_shortlist_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + final_deterministic: {'selected_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'matched_llm_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + shortlist: ['ohdsi:760', 'ohdsi:757', 'ohdsi:1057'] + rec_ids: ['ohdsi:760', 'ohdsi:757', 'ohdsi:1057'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:760', 'ohdsi:757', 'ohdsi:1057'] + final_deterministic: {'selected_ids': ['ohdsi:760', 'ohdsi:757', 'ohdsi:1057'], 'matched_llm_ids': ['ohdsi:760', 'ohdsi:757', 'ohdsi:1057'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 7: Patients with allergic rhinitis + shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + rec_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + final_deterministic: {'selected_ids': ['ohdsi:508', 'ohdsi:367', 'cipher:2081'], 'matched_llm_ids': ['ohdsi:508', 'ohdsi:367', 'cipher:2081'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 8: Patients with ischemic heart disease + shortlist: ['ohdsi:654', 'cipher:16261', 'cipher:29560', 'cipher:29218'] + rec_ids: ['ohdsi:654', 'cipher:16261', 'cipher:29560'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:654', 'cipher:16261', 'cipher:29560', 'cipher:29218'] + final_deterministic: {'selected_ids': ['ohdsi:654', 'cipher:16261', 'cipher:29560'], 'matched_llm_ids': ['ohdsi:654', 'cipher:16261', 'cipher:29560'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + shortlist: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + rec_ids: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + final_deterministic: {'selected_ids': ['cipher:2643', 'cipher:13824', 'cipher:2798'], 'matched_llm_ids': ['cipher:2643', 'cipher:13824'], 'defaulted_ids': ['cipher:2798'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 10: Patients who underwent lung resection + shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + rec_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + final_deterministic: {'selected_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'matched_llm_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 11: Patients with laryngitis + shortlist: ['ohdsi:355', 'cipher:15233', 'cipher:2046', 'cipher:2360'] + rec_ids: ['ohdsi:355', 'cipher:15233', 'cipher:2046'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:355', 'cipher:15233', 'cipher:2046', 'cipher:2360'] + final_deterministic: {'selected_ids': ['ohdsi:355', 'cipher:15233', 'cipher:2046'], 'matched_llm_ids': ['ohdsi:355', 'cipher:15233'], 'defaulted_ids': ['cipher:2046'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 12: Patients with regional enteritis + shortlist: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + rec_ids: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + final_deterministic: {'selected_ids': ['cipher:13571', 'cipher:15376', 'cipher:3534'], 'matched_llm_ids': ['cipher:13571', 'cipher:15376', 'cipher:3534'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 13: Patients with renal sclerosis + shortlist: ['cipher:13646', 'cipher:13656', 'cipher:3541', 'cipher:17322'] + rec_ids: ['cipher:13646', 'cipher:13656', 'cipher:3541'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13646', 'cipher:13656', 'cipher:3541', 'cipher:17322'] + final_deterministic: {'selected_ids': ['cipher:13646', 'cipher:13656', 'cipher:3541'], 'matched_llm_ids': ['cipher:13646', 'cipher:13656', 'cipher:3541'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 14: Patients with cardiomyopathy + shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + rec_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + final_deterministic: {'selected_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'matched_llm_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 15: Patients with a diagnosis of PRES + shortlist: ['ohdsi:223'] + rec_ids: ['ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:223'], 'matched_llm_ids': ['ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 16: Patients with anorexia nervosa + shortlist: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + rec_ids: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + final_deterministic: {'selected_ids': ['ohdsi:1340', 'cipher:17187', 'cipher:2117'], 'matched_llm_ids': ['ohdsi:1340'], 'defaulted_ids': ['cipher:17187', 'cipher:2117'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 17: Patients with dizziness, vertigo, or motion sickness + shortlist: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + rec_ids: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + final_deterministic: {'selected_ids': ['cipher:13215', 'ohdsi:893', 'cipher:3402'], 'matched_llm_ids': ['cipher:13215', 'ohdsi:893', 'cipher:3402'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 18: Patients with polymyalgia rheumatica + shortlist: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + rec_ids: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + final_deterministic: {'selected_ids': ['cipher:30277', 'cipher:13992', 'cipher:3460'], 'matched_llm_ids': ['cipher:30277', 'cipher:13992', 'cipher:3460'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + shortlist: ['cipher:2064', 'cipher:2123', 'cipher:3573', 'cipher:2915'] + rec_ids: ['cipher:2064', 'cipher:2123', 'cipher:3573'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2064', 'cipher:2123', 'cipher:3573', 'cipher:2915'] + final_deterministic: {'selected_ids': ['cipher:2064', 'cipher:2123', 'cipher:3573'], 'matched_llm_ids': ['cipher:2064', 'cipher:3573'], 'defaulted_ids': ['cipher:2123'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 20: Patients with low blood pressure + shortlist: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + rec_ids: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + replaced_ids: [] + blocked_pool_ids: ['ohdsi:997'] + blocked_candidate_reasons: {'ohdsi:997': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + final_deterministic: {'selected_ids': ['ohdsi:339', 'ohdsi:890', 'cipher:13390'], 'matched_llm_ids': ['ohdsi:339', 'ohdsi:890'], 'defaulted_ids': ['cipher:13390'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 21: Patients with encephalopathy + shortlist: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223', 'cipher:2664'] + rec_ids: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223', 'cipher:2664'] + final_deterministic: {'selected_ids': ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'], 'matched_llm_ids': ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 22: Patients with birdshot chorioretinitis + shortlist: ['ohdsi:1223'] + rec_ids: ['ohdsi:1223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1223'] + final_deterministic: {'selected_ids': ['ohdsi:1223'], 'matched_llm_ids': ['ohdsi:1223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 23: Older adults with macular degeneration + shortlist: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + rec_ids: ['cipher:30295', 'cipher:3006', 'cipher:3005'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + final_deterministic: {'selected_ids': ['cipher:30295', 'cipher:3006', 'cipher:3005'], 'matched_llm_ids': ['cipher:30295', 'cipher:3006', 'cipher:3005'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 24: Patients with autoimmune hemolytic anemia + shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'matched_llm_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 25: Patients with MSI-low rectal adenocarcinoma + shortlist: [] + rec_ids: [] + replaced_ids: ['ohdsi:831', 'ohdsi:823', 'ohdsi:819'] + blocked_pool_ids: ['ohdsi:836', 'ohdsi:823', 'ohdsi:819', 'ohdsi:831', 'ohdsi:843'] + blocked_candidate_reasons: {'ohdsi:836': 'procedure_for_diagnosis_intent', 'ohdsi:823': 'procedure_for_diagnosis_intent', 'ohdsi:819': 'procedure_for_diagnosis_intent', 'ohdsi:831': 'procedure_for_diagnosis_intent', 'ohdsi:843': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: [] + final_deterministic: {'selected_ids': [], 'matched_llm_ids': [], 'defaulted_ids': [], 'invalid_llm_ids': ['ohdsi:200839', 'ohdsi:4159', 'ohdsi:82'], 'duplicate_llm_ids': [], 'used_llm_justification_count': 0, 'used_default_justification_count': 0} + +CASE 26: Patients with blistering skin lesions + shortlist: ['ohdsi:652', 'ohdsi:376'] + rec_ids: ['ohdsi:652', 'ohdsi:376'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:652', 'ohdsi:376'] + final_deterministic: {'selected_ids': ['ohdsi:652', 'ohdsi:376'], 'matched_llm_ids': ['ohdsi:652', 'ohdsi:376'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 27: Patients with stomatitis or mucositis + shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + rec_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + final_deterministic: {'selected_ids': ['cipher:17298', 'cipher:15333', 'cipher:3657'], 'matched_llm_ids': ['cipher:17298', 'cipher:15333'], 'defaulted_ids': ['cipher:3657'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 28: Patients with neurofibromatosis type 1 + shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + rec_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + final_deterministic: {'selected_ids': ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'], 'matched_llm_ids': ['ohdsi:697', 'ohdsi:304'], 'defaulted_ids': ['ohdsi:305'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 29: Patients with keloid scars + shortlist: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + rec_ids: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + final_deterministic: {'selected_ids': ['cipher:13930', 'cipher:15610', 'cipher:2950'], 'matched_llm_ids': ['cipher:13930', 'cipher:15610', 'cipher:2950'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 30: Patients with acetaminophen exposure + shortlist: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428', 'ohdsi:1158'] + rec_ids: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428', 'ohdsi:1158'] + final_deterministic: {'selected_ids': ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428'], 'matched_llm_ids': ['ohdsi:1187'], 'defaulted_ids': ['ohdsi:1427', 'ohdsi:1428'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 31: Patients exposed to rifamycin antibiotics + shortlist: ['ohdsi:1206'] + rec_ids: ['ohdsi:1206'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1206'] + final_deterministic: {'selected_ids': ['ohdsi:1206'], 'matched_llm_ids': ['ohdsi:1206'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 32: Patients with a joint or ligament sprain + shortlist: ['cipher:14236', 'cipher:15814', 'ohdsi:363', 'cipher:2944'] + rec_ids: ['cipher:14236', 'cipher:15814', 'ohdsi:363'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:14236', 'cipher:15814', 'ohdsi:363', 'cipher:2944'] + final_deterministic: {'selected_ids': ['cipher:14236', 'cipher:15814', 'ohdsi:363'], 'matched_llm_ids': ['cipher:14236', 'cipher:15814', 'ohdsi:363'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 33: Pregnant patients with miscarriage or stillbirth + shortlist: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + rec_ids: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + final_deterministic: {'selected_ids': ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'], 'matched_llm_ids': ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + shortlist: ['cipher:13354', 'cipher:31819'] + rec_ids: ['cipher:13354', 'cipher:31819'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13354', 'cipher:31819'] + final_deterministic: {'selected_ids': ['cipher:13354', 'cipher:31819'], 'matched_llm_ids': ['cipher:13354'], 'defaulted_ids': ['cipher:31819'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + shortlist: ['ohdsi:1301', 'ohdsi:410', 'cipher:31223'] + rec_ids: ['ohdsi:1301', 'ohdsi:410', 'cipher:31223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1301', 'ohdsi:410', 'cipher:31223'] + final_deterministic: {'selected_ids': ['ohdsi:1301', 'ohdsi:410', 'cipher:31223'], 'matched_llm_ids': ['ohdsi:1301', 'ohdsi:410'], 'defaulted_ids': ['cipher:31223'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 36: Patients hospitalized with preinfarction syndrome + shortlist: ['ohdsi:1081'] + rec_ids: ['ohdsi:1081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1081'] + final_deterministic: {'selected_ids': ['ohdsi:1081'], 'matched_llm_ids': ['ohdsi:1081'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + shortlist: ['cipher:3412', 'cipher:17130'] + rec_ids: ['cipher:3412', 'cipher:17130'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3412', 'cipher:17130'] + final_deterministic: {'selected_ids': ['cipher:3412', 'cipher:17130'], 'matched_llm_ids': ['cipher:3412', 'cipher:17130'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 38: Patients with benign pancreatic conditions + shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + rec_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + final_deterministic: {'selected_ids': ['cipher:16954', 'cipher:16952', 'cipher:16953'], 'matched_llm_ids': ['cipher:16952', 'cipher:16953'], 'defaulted_ids': ['cipher:16954'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 39: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + rec_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'matched_llm_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 40: New users of dihydropyridine calcium channel blockers + shortlist: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + rec_ids: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + final_deterministic: {'selected_ids': ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052'], 'matched_llm_ids': ['ohdsi:1047', 'ohdsi:1048'], 'defaulted_ids': ['ohdsi:1052'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 41: Veteran patients with renal sclerosis + shortlist: ['cipher:13646', 'cipher:13656', 'cipher:17322', 'cipher:15442'] + rec_ids: ['cipher:13646', 'cipher:13656', 'cipher:17322'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13646', 'cipher:13656', 'cipher:17322', 'cipher:15442'] + final_deterministic: {'selected_ids': ['cipher:13646', 'cipher:13656', 'cipher:17322'], 'matched_llm_ids': ['cipher:13646', 'cipher:13656', 'cipher:17322'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 42: Veteran patients with polymyalgia rheumatica + shortlist: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + rec_ids: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + final_deterministic: {'selected_ids': ['cipher:30277', 'cipher:13992', 'cipher:17453'], 'matched_llm_ids': ['cipher:30277', 'cipher:13992', 'cipher:17453'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 43: Veteran patients with autoimmune hemolytic anemia + shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'matched_llm_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 44: Veteran patients with cardiac complications + shortlist: ['cipher:15819', 'cipher:17258'] + rec_ids: ['cipher:15819', 'cipher:17258'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15819', 'cipher:17258'] + final_deterministic: {'selected_ids': ['cipher:15819', 'cipher:17258'], 'matched_llm_ids': ['cipher:15819', 'cipher:17258'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 45: Patients diagnosed with fasciitis + shortlist: ['cipher:15684', 'cipher:14029', 'cipher:2703'] + rec_ids: ['cipher:15684', 'cipher:14029', 'cipher:2703'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15684', 'cipher:14029', 'cipher:2703'] + final_deterministic: {'selected_ids': ['cipher:15684', 'cipher:14029', 'cipher:2703'], 'matched_llm_ids': ['cipher:15684', 'cipher:14029', 'cipher:2703'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 46: Patients with stomatitis or mucositis + shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + rec_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + final_deterministic: {'selected_ids': ['cipher:17298', 'cipher:15333', 'cipher:3657'], 'matched_llm_ids': ['cipher:17298', 'cipher:15333'], 'defaulted_ids': ['cipher:3657'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 47: Patients with Barretts esophagus + shortlist: ['cipher:13531', 'cipher:2187', 'cipher:15342'] + rec_ids: ['cipher:13531', 'cipher:2187', 'cipher:15342'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13531', 'cipher:2187', 'cipher:15342'] + final_deterministic: {'selected_ids': ['cipher:13531', 'cipher:2187', 'cipher:15342'], 'matched_llm_ids': ['cipher:13531', 'cipher:2187', 'cipher:15342'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 48: Patients with regional enteritis + shortlist: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + rec_ids: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + final_deterministic: {'selected_ids': ['cipher:13571', 'cipher:15376', 'cipher:3534'], 'matched_llm_ids': ['cipher:13571', 'cipher:15376', 'cipher:3534'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 49: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + rec_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'matched_llm_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 50: Patients with aortic valve disease + shortlist: ['cipher:13226', 'cipher:13231', 'cipher:30301', 'cipher:17250'] + rec_ids: ['cipher:13226', 'cipher:13231', 'cipher:30301'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13226', 'cipher:13231', 'cipher:30301', 'cipher:17250'] + final_deterministic: {'selected_ids': ['cipher:13226', 'cipher:13231', 'cipher:30301'], 'matched_llm_ids': ['cipher:13226', 'cipher:13231', 'cipher:30301'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 51: Patients with chronic periodontitis + shortlist: ['cipher:13492', 'cipher:2371', 'cipher:13494', 'cipher:3397'] + rec_ids: ['cipher:13492', 'cipher:2371', 'cipher:13494'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13492', 'cipher:2371', 'cipher:13494', 'cipher:3397'] + final_deterministic: {'selected_ids': ['cipher:13492', 'cipher:2371', 'cipher:13494'], 'matched_llm_ids': ['cipher:13492', 'cipher:13494'], 'defaulted_ids': ['cipher:2371'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 52: Patients with hypertensive chronic kidney disease + shortlist: ['ohdsi:923', 'ohdsi:1191', 'ohdsi:964', 'cipher:2846'] + rec_ids: ['ohdsi:923', 'ohdsi:1191', 'ohdsi:964'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:923', 'ohdsi:1191', 'ohdsi:964', 'cipher:2846'] + final_deterministic: {'selected_ids': ['ohdsi:923', 'ohdsi:1191', 'ohdsi:964'], 'matched_llm_ids': ['ohdsi:923', 'ohdsi:1191', 'ohdsi:964'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 53: Patients with cardiomyopathy + shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + rec_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + final_deterministic: {'selected_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30192'], 'matched_llm_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30192'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 54: Patients with scleritis or episcleritis + shortlist: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + rec_ids: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + final_deterministic: {'selected_ids': ['cipher:30069', 'cipher:3581', 'cipher:13186'], 'matched_llm_ids': ['cipher:30069', 'cipher:13186'], 'defaulted_ids': ['cipher:3581'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + shortlist: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + rec_ids: ['cipher:12820', 'cipher:12818', 'cipher:3256'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + final_deterministic: {'selected_ids': ['cipher:12820', 'cipher:12818', 'cipher:3256'], 'matched_llm_ids': ['cipher:12820', 'cipher:12818', 'cipher:3256'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + shortlist: ['ohdsi:1187'] + rec_ids: ['ohdsi:1187'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1187'] + final_deterministic: {'selected_ids': ['ohdsi:1187'], 'matched_llm_ids': ['ohdsi:1187'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 57: Patients diagnosed with dyschromia and vitiligo + shortlist: ['cipher:13900', 'cipher:2628', 'ohdsi:471', 'cipher:30727'] + rec_ids: ['cipher:13900', 'cipher:2628', 'ohdsi:471'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13900', 'cipher:2628', 'ohdsi:471', 'cipher:30727'] + final_deterministic: {'selected_ids': ['cipher:13900', 'cipher:2628', 'ohdsi:471'], 'matched_llm_ids': ['cipher:13900', 'ohdsi:471'], 'defaulted_ids': ['cipher:2628'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + shortlist: ['ohdsi:716', 'ohdsi:735', 'ohdsi:294', 'ohdsi:293'] + rec_ids: ['ohdsi:716', 'ohdsi:735', 'ohdsi:294'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:716', 'ohdsi:735', 'ohdsi:294', 'ohdsi:293'] + final_deterministic: {'selected_ids': ['ohdsi:716', 'ohdsi:735', 'ohdsi:294'], 'matched_llm_ids': ['ohdsi:716', 'ohdsi:735', 'ohdsi:294'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + shortlist: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + rec_ids: ['cipher:13084', 'cipher:13085', 'cipher:14974'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + final_deterministic: {'selected_ids': ['cipher:13084', 'cipher:13085', 'cipher:14974'], 'matched_llm_ids': ['cipher:13084', 'cipher:13085', 'cipher:14974'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 60: patients with a diagnosis of PRES + shortlist: ['ohdsi:223'] + rec_ids: ['ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:223'], 'matched_llm_ids': ['ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 61: patients with chronic ulcerative colitis + shortlist: ['ohdsi:860', 'ohdsi:458', 'cipher:30724'] + rec_ids: ['ohdsi:860', 'ohdsi:458', 'cipher:30724'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:860', 'ohdsi:458', 'cipher:30724'] + final_deterministic: {'selected_ids': ['ohdsi:860', 'ohdsi:458', 'cipher:30724'], 'matched_llm_ids': ['ohdsi:860', 'ohdsi:458'], 'defaulted_ids': ['cipher:30724'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 62: Veteran patients with developmental disorders that are pervasive + shortlist: ['cipher:12996', 'cipher:17197', 'cipher:17193', 'cipher:3415'] + rec_ids: ['cipher:12996', 'cipher:17197', 'cipher:17193'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:12996', 'cipher:17197', 'cipher:17193', 'cipher:3415'] + final_deterministic: {'selected_ids': ['cipher:12996', 'cipher:17197', 'cipher:17193'], 'matched_llm_ids': ['cipher:12996', 'cipher:17193'], 'defaulted_ids': ['cipher:17197'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + shortlist: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + rec_ids: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + final_deterministic: {'selected_ids': ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'], 'matched_llm_ids': ['ohdsi:510', 'ohdsi:1081'], 'defaulted_ids': ['cipher:18982'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + shortlist: ['ohdsi:632', 'ohdsi:781'] + rec_ids: ['ohdsi:632', 'ohdsi:781'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:632', 'ohdsi:781'] + final_deterministic: {'selected_ids': ['ohdsi:632', 'ohdsi:781'], 'matched_llm_ids': ['ohdsi:632', 'ohdsi:781'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + shortlist: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + rec_ids: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + final_deterministic: {'selected_ids': ['ohdsi:864', 'ohdsi:651', 'cipher:31120'], 'matched_llm_ids': ['ohdsi:864', 'ohdsi:651', 'cipher:31120'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 66: patients who experienced a GI bleed adverse event + shortlist: ['ohdsi:482'] + rec_ids: ['ohdsi:482'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:482'] + final_deterministic: {'selected_ids': ['ohdsi:482'], 'matched_llm_ids': ['ohdsi:482'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + shortlist: ['ohdsi:678', 'cipher:31308'] + rec_ids: ['ohdsi:678', 'cipher:31308'] + replaced_ids: [] + blocked_pool_ids: ['ohdsi:47', 'ohdsi:59'] + blocked_candidate_reasons: {'ohdsi:47': 'withdrawn', 'ohdsi:59': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:678', 'cipher:31308'] + final_deterministic: {'selected_ids': ['ohdsi:678', 'cipher:31308'], 'matched_llm_ids': ['ohdsi:678'], 'defaulted_ids': ['cipher:31308'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 68: veterans who experienced an abdominal aortic aneurysm + shortlist: ['cipher:29240', 'cipher:29169', 'cipher:15196'] + rec_ids: ['cipher:29240', 'cipher:29169', 'cipher:15196'] + replaced_ids: ['ohdsi:1093'] + blocked_pool_ids: ['ohdsi:1093', 'ohdsi:866'] + blocked_candidate_reasons: {'ohdsi:1093': 'procedure_for_diagnosis_intent', 'ohdsi:866': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:29240', 'cipher:29169', 'cipher:15196'] + final_deterministic: {'selected_ids': ['cipher:29240', 'cipher:29169', 'cipher:15196'], 'matched_llm_ids': ['cipher:29240', 'cipher:29169', 'cipher:15196'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 69: patients with COPD according to diagnostic codes in the EHR + shortlist: ['cipher:29794', 'cipher:4241', 'cipher:29756', 'cipher:31297'] + rec_ids: ['cipher:29794', 'cipher:4241', 'cipher:29756'] + replaced_ids: [] + blocked_pool_ids: ['cipher:16798'] + blocked_candidate_reasons: {'cipher:16798': 'narrow_hospitalization_subtype_for_plain_diagnosis'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:29794', 'cipher:4241', 'cipher:29756', 'cipher:31297'] + final_deterministic: {'selected_ids': ['cipher:29794', 'cipher:4241', 'cipher:29756'], 'matched_llm_ids': ['cipher:29794', 'cipher:4241', 'cipher:29756'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 70: patients hospitalized at least once for heart failure + shortlist: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + rec_ids: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + final_deterministic: {'selected_ids': ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'], 'matched_llm_ids': ['ohdsi:934', 'ohdsi:1303'], 'defaulted_ids': ['cipher:16152'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + shortlist: ['cipher:30803', 'cipher:31250', 'cipher:31195', 'cipher:30170'] + rec_ids: ['cipher:30803', 'cipher:31250', 'cipher:31195'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30803', 'cipher:31250', 'cipher:31195', 'cipher:30170'] + final_deterministic: {'selected_ids': ['cipher:30803', 'cipher:31250', 'cipher:31195'], 'matched_llm_ids': ['cipher:31250', 'cipher:31195'], 'defaulted_ids': ['cipher:30803'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 1: Patients with an implanted cardiac defibrillator + intent_facets_raw: {'condition_or_topic': 'cardiac defibrillator', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients with implanted devices', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Implanted device'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['patients with cardiac devices']} + intent_facets_effective: {'condition_or_topic': 'cardiac defibrillator', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients with implanted devices', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Implanted device'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['patients with cardiac devices']} + planning_shortlist: ['cipher:15146', 'cipher:2288', 'cipher:13288'] + planning_reasoning: ['Selected shortlisted candidates align with cardiac defibrillator as a diagnosis-oriented study intent.', 'Included Cardiac defibrillator in situ (gwPheWAS) as a medication based candidate focused on Cardiac Defibrillator.', 'Included Cardiac defibrillator in situ (MAP) as a outcome candidate focused on Cardiac defibrillator in situ.', 'Included Cardiac Defibrillator in Situ (Phecode) as a comorbidity covariate candidate focused on Cardiac Defibrillator in Situ.'] + recommendations: + cipher:15146 | Cardiac defibrillator in situ (gwPheWAS) | This phenotype directly addresses the study intent of patients with an implanted cardiac defibrillator based on ICD codes used in the Million Veteran Program. + cipher:2288 | Cardiac defibrillator in situ (MAP) | Identifies patients classified as having a cardiac defibrillator in situ based on a MAP probability cutoff. + cipher:13288 | Cardiac Defibrillator in Situ (Phecode) | This phenotype defines the presence of a cardiac defibrillator in situ, using a Phecode grouping from J. Denny’s "Phecode Map". + +CASE 2: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included Fasciitis (Phecode) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (MAP) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + cipher:14029 | Fasciitis (Phecode) | This phenotype is defined using the Phecode mapping system, directly representing a diagnosis of Fasciitis based on ICD codes. + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis based on ICD codes used in the Million Veteran Program (MVP) phenome-wide GWAS. + cipher:2703 | Fasciitis (MAP) | This phenotype identifies patients with Fasciitis based on a MAP probability score exceeding a defined threshold. Use with caution due to the unsupervised nature of the MAP algorithm. + +CASE 3: Patients with acute prostatitis + intent_facets_raw: {'condition_or_topic': 'prostatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'acute', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['acute prostatitis'], 'care_setting_cues': ['inpatient', 'outpatient'], 'population_cues': ['acute']} + intent_facets_effective: {'condition_or_topic': 'prostatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'acute', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['acute prostatitis'], 'care_setting_cues': ['inpatient', 'outpatient'], 'population_cues': ['acute']} + planning_shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:3500'] + planning_reasoning: ['Selected shortlisted candidates align with prostatitis as a diagnosis-oriented study intent.', 'Included [P] Prostatitis as a diagnosis candidate focused on Prostatitis.', 'Included Acute Prostatitis (Phecode) as a diagnosis candidate focused on Acute Prostatitis.', 'Included Prostatitis (MAP) as a diagnosis candidate focused on Prostatitis.'] + recommendations: + ohdsi:283 | [P] Prostatitis | This phenotype aligns directly with the study intent of patients with acute prostatitis, capturing both chronic and acute forms. + cipher:13720 | Acute Prostatitis (Phecode) | This ICD-based phenotype is a suitable representation of acute prostatitis, leveraging existing coding practices. + cipher:3500 | Prostatitis (MAP) | The MAP phenotype provides a probabilistic assessment of prostatitis based on a complex algorithm, offering an alternative approach to diagnosis. + +CASE 4: Patients who underwent esophagectomy + intent_facets_raw: {'condition_or_topic': 'Esophagectomy', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'surgical patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Surgical procedure'], 'care_setting_cues': ['Hospital'], 'population_cues': ['Patients undergoing surgery']} + intent_facets_effective: {'condition_or_topic': 'Esophagectomy', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'surgical patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Surgical procedure'], 'care_setting_cues': ['Hospital'], 'population_cues': ['Patients undergoing surgery']} + planning_shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:1309', 'ohdsi:870'] + planning_reasoning: ['Selected shortlisted candidates align with Esophagectomy as a procedure-oriented study intent.', 'Included [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults, post op new Afib as a procedure candidate focused on Esophagectomy.'] + recommendations: + ohdsi:1097 | [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib | This phenotype directly reflects the study intent of patients who underwent esophagectomy and developed postoperative atrial fibrillation. + ohdsi:1294 | [P] Esophagectomy, adults | This phenotype represents the esophagectomy procedure itself, a core element of the study's focus. + ohdsi:1309 | [P] Esophagectomy, adults, post op new Afib | This phenotype specifically identifies patients with esophagectomy who developed postoperative atrial fibrillation, complementing the primary phenotype. + +CASE 5: Patients diagnosed with peripheral neuritis + intent_facets_raw: {'condition_or_topic': 'peripheral neuritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'peripheral neuritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + planning_reasoning: ['Selected shortlisted candidates align with peripheral neuritis as a diagnosis-oriented study intent.', 'Included [P][R] Peripheral neuritis as a diagnosis candidate focused on Peripheral neuritis.', 'Included [P] Peripheral Neuropathy or Neuritits as a diagnosis candidate focused on Peripheral Neuropathy.', 'Included [P][R] Optic neuritis as a diagnosis candidate focused on Optic neuritis.', 'Near-duplicate topical variants were removed to preserve distinct recommendation coverage: ohdsi:540.'] + recommendations: + ohdsi:388 | [P][R] Peripheral neuritis | This phenotype represents the diagnosis of peripheral neuritis, aligning directly with the study intent. + ohdsi:389 | [P] Peripheral Neuropathy or Neuritits | Identifies the first occurrence of peripheral neuropathy or neuritis, a relevant condition to the study's focus. + ohdsi:238 | [P][R] Optic neuritis | Captures diagnosis of optic neuritis, potentially related to the broader neurological manifestations of peripheral nerve issues. + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + intent_facets_raw: {} + intent_facets_effective: {'clinical_topic_aliases': []} + planning_shortlist: ['ohdsi:760', 'ohdsi:757', 'ohdsi:1057'] + planning_reasoning: ['Selected shortlisted candidates align with the requested clinical topic as a phenotype-oriented study intent.', 'Included [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap as a medication based candidate focused on IL-23 Inhibitors.', 'Included [P] Concomitant TNF - alpha Inhibitors and IL23 Inhibitors - GE 30D overlap as a medication based candidate focused on TNF-alpha Inhibitors, IL23 Inhibitors.', 'Included [P] New users of IL-23 inhibitors nested in Plaque psoriasis as a medication based candidate focused on IL-23 inhibitors.'] + recommendations: + ohdsi:760 | [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap | This phenotype directly addresses the study intent of concomitant IL-23 and IL-12/23 inhibitor exposure. + ohdsi:757 | [P] Concomitant TNF - alpha Inhibitors and IL23 Inhibitors - GE 30D overlap | This phenotype directly addresses the study intent of concomitant TNF-alpha and IL23 inhibitor exposure. + ohdsi:1057 | [P] New users of IL-23 inhibitors nested in Plaque psoriasis | This phenotype focuses on IL-23 inhibitor use, relevant to patients with plaque psoriasis, which aligns with potential inflammatory conditions of interest. + +CASE 7: Patients with allergic rhinitis + intent_facets_raw: {'condition_or_topic': 'allergic rhinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'allergic rhinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + planning_reasoning: ['Selected shortlisted candidates align with allergic rhinitis as a diagnosis-oriented study intent.', 'Included [P][R] Allergic rhinitis as a diagnosis candidate focused on Allergic rhinitis.', 'Included [P] Allergic Rhinitis as a diagnosis candidate focused on Allergic Rhinitis.', 'Included Allergic rhinitis (MAP) as a diagnosis candidate focused on Allergic Rhinitis.'] + recommendations: + ohdsi:508 | [P][R] Allergic rhinitis | This phenotype represents the diagnosis of Allergic rhinitis, aligning directly with the study intent of patients with allergic rhinitis. + ohdsi:367 | [P] Allergic Rhinitis | This phenotype represents events of Allergic Rhinitis, primarily intended for diagnosing this condition, a relevant consideration for the study intent. + cipher:2081 | Allergic rhinitis (MAP) | This phenotype identifies patients with Allergic Rhinitis based on a MAP clustering algorithm and a probability cutoff, suitable for investigation. + +CASE 8: Patients with ischemic heart disease + intent_facets_raw: {'condition_or_topic': 'ischemic heart disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart condition'], 'care_setting_cues': ['Hospital care', 'Clinic care'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'ischemic heart disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart condition'], 'care_setting_cues': ['Hospital care', 'Clinic care'], 'population_cues': ['Adult patients']} + planning_shortlist: ['ohdsi:654', 'cipher:16261', 'cipher:29560', 'cipher:29218'] + planning_reasoning: ['Selected shortlisted candidates align with ischemic heart disease as a diagnosis-oriented study intent.', 'Included [P][R] Ischemic heart disease as a diagnosis candidate focused on Ischemic heart disease.', 'Included Ischemic Heart Disease (Sandhu) as a diagnosis candidate focused on Ischemic Heart Disease.', 'Included Ischemic Heart Disease (HDR UK) as a diagnosis candidate focused on Ischemic Heart Disease.'] + recommendations: + ohdsi:654 | [P][R] Ischemic heart disease | This phenotype represents the diagnosis of Ischemic heart disease, directly aligning with the study intent. + cipher:16261 | Ischemic Heart Disease (Sandhu) | This phenotype identifies patients with IHD based on ICD-10 codes, a relevant diagnostic approach for ischemic heart disease. + cipher:29560 | Ischemic Heart Disease (HDR UK) | This phenotype identifies patients diagnosed with Ischemic Heart Disease based on NLP analysis of EHR data, reflecting a diagnostic approach. + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + intent_facets_raw: {'condition_or_topic': 'Hemorrhage in early pregnancy or threatened labor', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Pregnancy'], 'care_setting_cues': ['Any care setting'], 'population_cues': ['Pregnant women']} + intent_facets_effective: {'condition_or_topic': 'Hemorrhage in early pregnancy or threatened labor', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Pregnancy'], 'care_setting_cues': ['Any care setting'], 'population_cues': ['Pregnant women']} + planning_shortlist: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + planning_reasoning: ['Selected shortlisted candidates align with Hemorrhage in early pregnancy or threatened labor as a diagnosis-oriented study intent.', 'Included Early or threatened labor; hemorrhage in early pregnancy (MAP) as a complication candidate focused on Early or threatened labor; hemorrhage in early pregnancy.', 'Included Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) as a complication candidate focused on Early Labor Hemorrhage.', 'Included Hemorrhage in early pregnancy (MAP) as a diagnosis candidate focused on Hemorrhage in early pregnancy.'] + recommendations: + cipher:2643 | Early or threatened labor; hemorrhage in early pregnancy (MAP) | This phenotype directly addresses the study intent of 'Pregnant patients with hemorrhage in early pregnancy or threatened labor' using a MAP clustering approach. + cipher:13824 | Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) | This Phecode phenotype is also a relevant representation of early or threatened labor hemorrhage in early pregnancy, providing another option based on ICD code mapping. + cipher:2798 | Hemorrhage in early pregnancy (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 10: Patients who underwent lung resection + intent_facets_raw: {'condition_or_topic': 'lung resection', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital setting', 'clinical care'], 'population_cues': ['patient population', 'clinical patients']} + intent_facets_effective: {'condition_or_topic': 'lung resection', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital setting', 'clinical care'], 'population_cues': ['patient population', 'clinical patients']} + planning_shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + planning_reasoning: ['Selected shortlisted candidates align with lung resection as a procedure-oriented study intent.', 'Included [P] Lung Resection, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, inpt stay, no ED as a procedure candidate focused on Lung Resection.'] + recommendations: + ohdsi:1268 | [P] Lung Resection, adults, inpt stay, no ED, post op new Afib | This phenotype directly aligns with the study intent of patients who underwent lung resection and developed new atrial fibrillation post-operatively. + ohdsi:1308 | [P] Lung Resection, adults, post op new Afib | This phenotype represents lung resection and the subsequent development of new atrial fibrillation, a common complication post-surgery. + ohdsi:869 | [P] Lung Resection, adults, inpt stay, no ED | This phenotype captures the procedure of lung resection, associated with inpatient stays, which is relevant to the study intent. + +CASE 11: Patients with laryngitis + intent_facets_raw: {'condition_or_topic': 'laryngitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Primary diagnosis'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Patients']} + intent_facets_effective: {'condition_or_topic': 'laryngitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Primary diagnosis'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Patients']} + planning_shortlist: ['ohdsi:355', 'cipher:15233', 'cipher:2046', 'cipher:2360'] + planning_reasoning: ['Selected shortlisted candidates align with laryngitis as a diagnosis-oriented study intent.', 'Included [P] Laryngitis as a diagnosis candidate focused on Laryngitis.', 'Included Acute laryngitis and tracheitis (gwPheWAS) as a diagnosis candidate focused on Acute laryngitis and tracheitis.', 'Included Acute laryngitis and tracheitis (MAP) as a diagnosis candidate focused on Acute laryngitis and tracheitis.'] + recommendations: + ohdsi:355 | [P] Laryngitis | This phenotype directly represents the diagnosis of Laryngitis, aligning with the study intent of patients with laryngitis. + cipher:15233 | Acute laryngitis and tracheitis (gwPheWAS) | This phenotype represents a coded definition of Acute laryngitis and tracheitis, which is relevant to studying patients with laryngitis symptoms. + cipher:2046 | Acute laryngitis and tracheitis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 12: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + planning_reasoning: ['Selected shortlisted candidates align with regional enteritis as a diagnosis-oriented study intent.', 'Included Regional Enteritis (Phecode) as a diagnosis candidate focused on Regional Enteritis.', 'Included Regional enteritis (gwPheWAS) as a outcome candidate focused on Regional Enteritis.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.'] + recommendations: + cipher:13571 | Regional Enteritis (Phecode) | This phenotype is defined using the Phecode grouping for Regional Enteritis, based on ICD-9 and ICD-10 codes, aligning directly with the study intent. + cipher:15376 | Regional enteritis (gwPheWAS) | This phenotype defines Regional Enteritis based on ICD codes used in the Million Veteran Program phenome-wide GWAS, a relevant outcome measure. + cipher:3534 | Regional enteritis (MAP) | This phenotype identifies patients with Regional Enteritis based on a MAP algorithm probability cutoff of 0.45, a valid diagnostic approach. + +CASE 13: Patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of renal sclerosis'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of renal sclerosis'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['adult patients']} + planning_shortlist: ['cipher:13646', 'cipher:13656', 'cipher:3541', 'cipher:17322'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Nephritis Nephrosis Renal Sclerosis (Phecode) as a diagnosis candidate focused on Nephritis Nephrosis Renal Sclerosis.', 'Included Renal Sclerosis NOS (Phecode) as a diagnosis candidate focused on Renal Sclerosis.', 'Included Renal sclerosis, NOS (MAP) as a comorbidity covariate candidate focused on Renal Sclerosis.'] + recommendations: + cipher:13646 | Nephritis Nephrosis Renal Sclerosis (Phecode) | This phenotype definition uses the Phecode system, which maps ICD codes to clinically relevant chronic kidney disease characterized by renal sclerosis. It aligns directly with the study intent of iden + cipher:13656 | Renal Sclerosis NOS (Phecode) | This phenotype definition using the Phecode system focuses on renal sclerosis, aligning with the study intent. It uses ICD-9 and ICD-10 codes. + cipher:3541 | Renal sclerosis, NOS (MAP) | This phenotype is derived from the MAP algorithm, which identifies renal sclerosis based on ICD codes with a probability cutoff of 0.6. It's a useful covariate consideration. + +CASE 14: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart condition'], 'care_setting_cues': ['Hospital care', 'Clinic care'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart condition'], 'care_setting_cues': ['Hospital care', 'Clinic care'], 'population_cues': ['Adult patients']} + planning_shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Other Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Cardiomyopathy.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.'] + recommendations: + cipher:30192 | Other Cardiomyopathy (HDR UK) | This phenotype directly aligns with the study intent of patients with cardiomyopathy. + cipher:31252 | Cardiomyopathy (Knight) | This phenotype also aligns with the study intent of patients with cardiomyopathy. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype represents Takotsubo cardiomyopathy, a recognized type of cardiomyopathy. + +CASE 15: Patients with a diagnosis of PRES + intent_facets_raw: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with PRES as a diagnosis-oriented study intent.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | The study intent is for patients with a diagnosis of PRES. This phenotype directly represents that diagnosis. + +CASE 16: Patients with anorexia nervosa + intent_facets_raw: {'condition_or_topic': 'anorexia nervosa', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of anorexia nervosa'], 'care_setting_cues': ['outpatient care', 'inpatient care'], 'population_cues': ['patients with anorexia nervosa']} + intent_facets_effective: {'condition_or_topic': 'anorexia nervosa', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['diagnosis of anorexia nervosa'], 'care_setting_cues': ['outpatient care', 'inpatient care'], 'population_cues': ['patients with anorexia nervosa']} + planning_shortlist: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + planning_reasoning: ['Selected shortlisted candidates align with anorexia nervosa as a diagnosis-oriented study intent.', 'Included [P] Anorexia Nervosa as a diagnosis candidate focused on Anorexia Nervosa.', 'Included Anorexia Nervosa (VADC) as a diagnosis candidate focused on Anorexia Nervosa.', 'Included Anorexia nervosa (MAP) as a diagnosis candidate focused on Anorexia Nervosa.'] + recommendations: + ohdsi:1340 | [P] Anorexia Nervosa | This phenotype represents the diagnosis of Anorexia Nervosa, defined as events lasting 30 days, collapsed within 1 year. It aligns directly with the study intent of patients with anorexia nervosa. + cipher:17187 | Anorexia Nervosa (VADC) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:2117 | Anorexia nervosa (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 17: Patients with dizziness, vertigo, or motion sickness + intent_facets_raw: {'condition_or_topic': 'dizziness, vertigo, motion sickness', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dizziness, vertigo, motion sickness', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + planning_reasoning: ['Selected shortlisted candidates align with dizziness, vertigo, motion sickness as a diagnosis-oriented study intent.', 'Included Dizziness and Giddiness (Lightheadedness and Vertigo) (Phecode) as a diagnosis candidate focused on Dizziness and Giddiness (Lightheadedness and Vertigo).', 'Included [P] Vertigo as a diagnosis candidate focused on Vertigo.', 'Included Peripheral or central vertigo (MAP) as a diagnosis candidate focused on Vertigo.'] + recommendations: + cipher:13215 | Dizziness and Giddiness (Lightheadedness and Vertigo) (Phecode) | This phenotype directly addresses the user's stated intent of diagnosing patients with dizziness, vertigo, or motion sickness using the Phecode system. + ohdsi:893 | [P] Vertigo | This phenotype represents a condition of vertigo, aligning with the user's interest in dizziness and vertigo. + cipher:3402 | Peripheral or central vertigo (MAP) | This phenotype identifies patients with vertigo based on a MAP clustering algorithm, a relevant approach to diagnosing this condition. + +CASE 18: Patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['adult patients']} + planning_shortlist: ['cipher:30277', 'cipher:13992', 'cipher:3460'] + planning_reasoning: ['Selected shortlisted candidates align with polymyalgia rheumatica as a diagnosis-oriented study intent.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (Phecode) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (MAP) as a comorbidity covariate candidate focused on Polymyalgia Rheumatica.'] + recommendations: + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype is based on ICD-10 codes and is intended for identifying patients with Polymyalgia Rheumatica. + cipher:13992 | Polymyalgia Rheumatica (Phecode) | This phenotype uses Phecode mapping of ICD-9 and ICD-10 codes for Polymyalgia Rheumatica, suitable for diagnosis. + cipher:3460 | Polymyalgia Rheumatica (MAP) | This phenotype provides a probability-based prediction of Polymyalgia Rheumatica based on ICD code counts, useful as a comorbidity covariate. + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + intent_facets_raw: {'condition_or_topic': 'corticosteroid adverse effects', 'clinical_topic_aliases': [], 'phenotype_role': 'adverse event|medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['adverse event'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'corticosteroid adverse effects', 'clinical_topic_aliases': [], 'phenotype_role': 'adverse event|medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['adverse event'], 'care_setting_cues': ['any'], 'population_cues': ['patients']} + planning_shortlist: ['cipher:2064', 'cipher:2123', 'cipher:3573', 'cipher:2915'] + planning_reasoning: ['Selected shortlisted candidates align with corticosteroid adverse effects as a adverse event|medication based-oriented study intent.', 'Included Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Adrenal Cortical Steroids Adverse Effects.', 'Included Antilipemic and antiarteriosclerotic drugs causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Lipid-Lowering Drug Adverse Effects.', 'Included Salicylates causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Salicylates adverse effects.'] + recommendations: + cipher:2064 | Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) | This phenotype directly addresses the study intent of patients with adverse effects from therapeutic corticosteroid use, as defined by the MAP clustering algorithm and ICD code utilization. + cipher:2123 | Antilipemic and antiarteriosclerotic drugs causing adverse effects in therapeutic use (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned complication match. + cipher:3573 | Salicylates causing adverse effects in therapeutic use (MAP) | Salicylates are a type of corticosteroid, and this phenotype identifies patients experiencing adverse effects from them, aligning with the study’s focus on corticosteroid-related adverse events. + +CASE 20: Patients with low blood pressure + intent_facets_raw: {'condition_or_topic': 'Hypotension', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Hypotension', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + planning_reasoning: ['Selected shortlisted candidates align with Hypotension as a diagnosis-oriented study intent.', 'Included [P][R] Low blood pressure as a severity candidate focused on Hypotension.', 'Included [P] Hypotension as a severity candidate focused on Hypotension.', 'Included Hypotension Nos (Phecode) as a diagnosis candidate focused on Hypotension.'] + recommendations: + ohdsi:339 | [P][R] Low blood pressure | This phenotype directly reflects the study intent of identifying patients with low blood pressure. It is a severity measure related to hypotension. + ohdsi:890 | [P] Hypotension | This phenotype represents a severity measure of low blood pressure and aligns with the study intent. + cipher:13390 | Hypotension Nos (Phecode) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 21: Patients with encephalopathy + intent_facets_raw: {'condition_or_topic': 'encephalopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'general patient population', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient diagnosis'], 'care_setting_cues': ['Hospital patient care', 'Clinic patient care'], 'population_cues': ['All patients']} + intent_facets_effective: {'condition_or_topic': 'encephalopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'general patient population', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient diagnosis'], 'care_setting_cues': ['Hospital patient care', 'Clinic patient care'], 'population_cues': ['All patients']} + planning_shortlist: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223', 'cipher:2664'] + planning_reasoning: ['Selected shortlisted candidates align with encephalopathy as a diagnosis-oriented study intent.', 'Included [P] Encephalopathy or its presentations as a diagnosis candidate focused on Encephalopathy.', 'Included [P] Encephalopathy as a diagnosis candidate focused on Encephalopathy.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:194 | [P] Encephalopathy or its presentations | This phenotype directly addresses the study intent of identifying patients with encephalopathy, encompassing various presentations and underlying causes. + ohdsi:331 | [P] Encephalopathy | This phenotype represents the clinical diagnosis of Encephalopathy, a neurological condition characterized by a range of symptoms, aligning with the study's focus. + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | This phenotype represents the clinical diagnosis of Posterior reversible encephalopathy syndrome (PRES) and is relevant to encephalopathy-related research. + +CASE 22: Patients with birdshot chorioretinitis + intent_facets_raw: {'condition_or_topic': 'birdshot chorioretinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'ophthalmology', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'birdshot chorioretinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'ophthalmology', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1223'] + planning_reasoning: ['Selected shortlisted candidates align with birdshot chorioretinitis as a diagnosis-oriented study intent.', 'Included [P] Birdshot chorioretinitis as a diagnosis candidate focused on Uveitis.'] + recommendations: + ohdsi:1223 | [P] Birdshot chorioretinitis | This phenotype directly addresses the study intent of identifying patients with birdshot chorioretinitis, a form of uveitis. + +CASE 23: Older adults with macular degeneration + intent_facets_raw: {'condition_or_topic': 'macular degeneration', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + intent_facets_effective: {'condition_or_topic': 'macular degeneration', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + planning_shortlist: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + planning_reasoning: ['Selected shortlisted candidates align with macular degeneration as a diagnosis-oriented study intent.', 'Included Macular Degeneration (HDR UK) as a diagnosis candidate focused on Macular Degeneration.', 'Included Macular degeneration (senile) of retina NOS (MAP) as a diagnosis candidate focused on Macular Degeneration.', 'Included Macular degeneration, dry (MAP) as a diagnosis candidate focused on Macular Degeneration.'] + recommendations: + cipher:30295 | Macular Degeneration (HDR UK) | This phenotype directly identifies patients with a diagnosis or history of Macular Degeneration based on ICD10 codes, Med Codes, and Read codes v2, aligning with the study intent of older adults with + cipher:3006 | Macular degeneration (senile) of retina NOS (MAP) | This phenotype uses a MAP algorithm to identify patients with Macular Degeneration, aligning with the study intent and utilizing a probabilistic approach. + cipher:3005 | Macular degeneration, dry (MAP) | This phenotype identifies patients with dry macular degeneration using a MAP algorithm, another relevant approach for diagnosing the condition in older adults. + +CASE 24: Patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Autoimmune hemolytic anemia (OHDSI) as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | This phenotype directly addresses the study intent of identifying patients with Warm Autoimmune Hemolytic Anemia. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype is a broader definition of Autoimmune hemolytic anemia, suitable for inclusion alongside the earliest diagnosis. + cipher:18441 | Autoimmune hemolytic anemia (OHDSI) | This phenotype offers an alternative, well-established definition of Autoimmune hemolytic anemia. + +CASE 25: Patients with MSI-low rectal adenocarcinoma + intent_facets_raw: {'condition_or_topic': 'colorectal cancer', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary cancer diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'colorectal cancer', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary cancer diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['adult patients']} + planning_shortlist: [] + planning_reasoning: ['Selected shortlisted candidates align with colorectal cancer as a diagnosis-oriented study intent.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:831, ohdsi:823, ohdsi:819.'] + recommendations: + +CASE 26: Patients with blistering skin lesions + intent_facets_raw: {'condition_or_topic': 'Blistering skin lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Blistering skin lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:652', 'ohdsi:376'] + planning_reasoning: ['Selected shortlisted candidates align with Blistering skin lesions as a diagnosis-oriented study intent.', 'Included [P][R] Vasculitis of the skin as a diagnosis candidate focused on Vasculitis of the skin.', 'Included [P][R] Bleeding skin as a outcome candidate focused on Bleeding Skin.'] + recommendations: + ohdsi:652 | [P][R] Vasculitis of the skin | This phenotype represents events of Vasculitis of the skin, which aligns with the study intent of patients with blistering skin lesions. + ohdsi:376 | [P][R] Bleeding skin | Bleeding skin is a relevant finding related to blistering skin lesions and can be considered in the context of the study intent. + +CASE 27: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': ['outpatient', 'inpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': ['outpatient', 'inpatient'], 'population_cues': ['patients']} + planning_shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + planning_reasoning: ['Selected shortlisted candidates align with Stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and mucositis (gwPheWAS) as a complication candidate focused on Stomatitis and mucositis.', 'Included Stomatitis and mucositis (ulcerative) (MAP) as a diagnosis candidate focused on Stomatitis and mucositis (ulcerative).'] + recommendations: + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | This phenotype directly addresses the study intent of patients with stomatitis or mucositis. + cipher:15333 | Stomatitis and mucositis (gwPheWAS) | This phenotype also directly addresses stomatitis and mucositis and is derived from a GWAS study. + cipher:3657 | Stomatitis and mucositis (ulcerative) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 28: Patients with neurofibromatosis type 1 + intent_facets_raw: {'condition_or_topic': 'neurofibromatosis type 1', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'neurofibromatosis type 1', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + planning_reasoning: ['Selected shortlisted candidates align with neurofibromatosis type 1 as a diagnosis-oriented study intent.', 'Included [P][R] Neurofibromatosis type 1 as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 without Type 2 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.'] + recommendations: + ohdsi:697 | [P][R] Neurofibromatosis type 1 | This phenotype represents the diagnosis of Neurofibromatosis type 1 (NF1), aligning directly with the study intent. + ohdsi:304 | Neurofibromatosis type 1 (FP) | This phenotype represents the diagnosis of Neurofibromatosis type 1 (NF1) and is a relevant consideration given the study's focus. + ohdsi:305 | Neurofibromatosis type 1 without Type 2 (FP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 29: Patients with keloid scars + intent_facets_raw: {'condition_or_topic': 'Keloid Scar', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Keloid Scar', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + planning_reasoning: ['Selected shortlisted candidates align with Keloid Scar as a diagnosis-oriented study intent.', 'Included Keloid Scar (Phecode) as a diagnosis candidate focused on Keloid Scar.', 'Included Keloid scar (gwPheWAS) as a comorbidity covariate candidate focused on Keloid Scar.', 'Included Keloid scar (MAP) as a comorbidity covariate candidate focused on Keloid Scar.'] + recommendations: + cipher:13930 | Keloid Scar (Phecode) | This phenotype is defined using the Phecode grouping, representing a diagnosis of Keloid Scar based on ICD-9 and ICD-10 codes, aligning directly with the study intent. + cipher:15610 | Keloid scar (gwPheWAS) | This phenotype represents a comorbidity/covariate related to Keloid Scar, derived from the Million Veteran Program phenome-wide GWAS study, suitable for exploring relationships with other outcomes. + cipher:2950 | Keloid scar (MAP) | This phenotype utilizes the MAP algorithm to identify Keloid Scar based on ICD code probabilities, offering a probabilistic approach to defining the condition. + +CASE 30: Patients with acetaminophen exposure + intent_facets_raw: {'condition_or_topic': 'acetaminophen exposure', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'acetaminophen exposure', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1428', 'ohdsi:1158'] + planning_reasoning: ['Selected shortlisted candidates align with acetaminophen exposure as a medication based-oriented study intent.', 'Included [P] acetaminophen exposure 10 as a medication based candidate focused on acetaminophen exposure.', 'Included [P] Acamprosate, all exposures as a medication based candidate focused on Acamprosate Exposure.', 'Included [P] Disulfiram, all exposures as a medication based candidate focused on Disulfiram Exposure.'] + recommendations: + ohdsi:1187 | [P] acetaminophen exposure 10 | This phenotype directly addresses the study intent of 'Patients with acetaminophen exposure' and captures exposure with a 30-day persistence window. + ohdsi:1427 | [P] Acamprosate, all exposures | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1428 | [P] Disulfiram, all exposures | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 31: Patients exposed to rifamycin antibiotics + intent_facets_raw: {'condition_or_topic': 'antibiotic exposure', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'antibiotic exposure', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1206'] + planning_reasoning: ['Selected shortlisted candidates align with antibiotic exposure as a medication based-oriented study intent.', 'Included [P] Antibiotics Macrolides 10 as a medication based candidate focused on Macrolide Drug Exposure.'] + recommendations: + ohdsi:1206 | [P] Antibiotics Macrolides 10 | This phenotype represents exposure to macrolide antibiotics, aligning with the study intent of patients exposed to rifamycin antibiotics. + +CASE 32: Patients with a joint or ligament sprain + intent_facets_raw: {'condition_or_topic': 'joint or ligament sprain', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['patients with sprain']} + intent_facets_effective: {'condition_or_topic': 'joint or ligament sprain', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['patients with sprain']} + planning_shortlist: ['cipher:14236', 'cipher:15814', 'ohdsi:363', 'cipher:2944'] + planning_reasoning: ['Selected shortlisted candidates align with joint or ligament sprain as a diagnosis-oriented study intent.', 'Included Joint Ligament Sprain (Phecode) as a diagnosis candidate focused on Joint Ligament Sprain.', 'Included Joint/ligament sprain (gwPheWAS) as a comorbidity covariate candidate focused on Joint Ligament Sprain.', 'Included [P][R] Joint stiffness as a diagnosis candidate focused on Joint stiffness.'] + recommendations: + cipher:14236 | Joint Ligament Sprain (Phecode) | Directly reflects the study intent of patients with a joint or ligament sprain, utilizing a Phecode definition. + cipher:15814 | Joint/ligament sprain (gwPheWAS) | Another Phecode definition relevant to joint and ligament sprains, utilized in the Million Veteran Program. + ohdsi:363 | [P][R] Joint stiffness | Represents joint stiffness, a related condition that aligns with the study intent of joint or ligament sprain. + +CASE 33: Pregnant patients with miscarriage or stillbirth + intent_facets_raw: {'condition_or_topic': 'miscarriage|stillbirth', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'pregnant_patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['pregnancy', 'pregnant_patients']} + intent_facets_effective: {'condition_or_topic': 'miscarriage|stillbirth', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'pregnant_patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['pregnancy', 'pregnant_patients']} + planning_shortlist: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + planning_reasoning: ['Selected shortlisted candidates align with miscarriage|stillbirth as a outcome-oriented study intent.', 'Included [P][R] Miscarriage as a outcome candidate focused on Miscarriage.', 'Included [P] Still birth as a outcome candidate focused on Stillbirth.', 'Included Miscarriage; stillbirth (MAP) as a outcome candidate focused on Miscarriage; Stillbirth.'] + recommendations: + ohdsi:627 | [P][R] Miscarriage | This phenotype directly represents the event of miscarriage, aligning with the study intent of 'Pregnant patients with miscarriage or stillbirth'. + ohdsi:1432 | [P] Still birth | This phenotype represents stillbirth, aligning with the study intent of 'Pregnant patients with miscarriage or stillbirth'. + cipher:3056 | Miscarriage; stillbirth (MAP) | This phenotype identifies patients with a confirmed diagnosis of miscarriage or stillbirth derived from MAP unsupervised clustering, which aligns with the study intent. + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + intent_facets_raw: {'condition_or_topic': 'arterial embolism or thrombosis of a lower extremity artery', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'lower extremity', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['lower extremity artery'], 'care_setting_cues': ['inpatient', 'outpatient'], 'population_cues': ['lower extremity']} + intent_facets_effective: {'condition_or_topic': 'arterial embolism or thrombosis of a lower extremity artery', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'lower extremity', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['lower extremity artery'], 'care_setting_cues': ['inpatient', 'outpatient'], 'population_cues': ['lower extremity']} + planning_shortlist: ['cipher:13354', 'cipher:31819'] + planning_reasoning: ['Selected shortlisted candidates align with arterial embolism or thrombosis of a lower extremity artery as a diagnosis-oriented study intent.', 'Included Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) as a complication candidate focused on Arterial Embolism and Thrombosis.', 'Included Embolism or thrombosis of Arteries of Upper Extremities (Allara) as a complication candidate focused on Arterial Embolism, Upper Extremity.'] + recommendations: + cipher:13354 | Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) | This phenotype directly reflects the study intent: Patients with arterial embolism or thrombosis of a lower extremity artery. + cipher:31819 | Embolism or thrombosis of Arteries of Upper Extremities (Allara) | Selected from the top reranked shortlisted candidates as a clinically aligned complication match. + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + intent_facets_raw: {'condition_or_topic': 'urinary tract infection', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'urinary tract infection', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1301', 'ohdsi:410', 'cipher:31223'] + planning_reasoning: ['Selected shortlisted candidates align with urinary tract infection as a diagnosis-oriented study intent.', 'Included [P] Acute Urinary tract infections UTI events as a diagnosis candidate focused on Urinary Tract Infection.', 'Included [P] Acute Urinary tract infections UTI as a diagnosis candidate focused on Urinary Tract Infection.', 'Included Urinary Tract Infection (Palin) as a diagnosis candidate focused on Urinary Tract Infection.'] + recommendations: + ohdsi:1301 | [P] Acute Urinary tract infections UTI events | This phenotype directly identifies patients with acute urinary tract infections, aligning with the study intent of new users of cephalosporins. It's a diagnosis-based phenotype. + ohdsi:410 | [P] Acute Urinary tract infections UTI | This phenotype also identifies patients with a confirmed or suspected infection of the urinary tract, making it a relevant consideration given the study's focus on UTI. + cipher:31223 | Urinary Tract Infection (Palin) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 36: Patients hospitalized with preinfarction syndrome + intent_facets_raw: {'condition_or_topic': 'Preinfarction syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'Hospitalized patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient hospitalized'], 'care_setting_cues': ['Inpatient setting'], 'population_cues': ['Hospitalized patients']} + intent_facets_effective: {'condition_or_topic': 'Preinfarction syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'Hospitalized patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient hospitalized'], 'care_setting_cues': ['Inpatient setting'], 'population_cues': ['Hospitalized patients']} + planning_shortlist: ['ohdsi:1081'] + planning_reasoning: ['Selected shortlisted candidates align with Preinfarction syndrome as a diagnosis-oriented study intent.', 'Included [P] FDA AESI Acute Myocardial Infarction or its complications as a diagnosis candidate focused on Acute Myocardial Infarction.'] + recommendations: + ohdsi:1081 | [P] FDA AESI Acute Myocardial Infarction or its complications | This phenotype directly addresses Acute Myocardial Infarction, a key diagnosis in the study intent of patients with preinfarction syndrome. + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + intent_facets_raw: {'condition_or_topic': 'blood or blood-forming organ disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with personal history', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'blood or blood-forming organ disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with personal history', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:3412', 'cipher:17130'] + planning_reasoning: ['Selected shortlisted candidates align with blood or blood-forming organ disease as a diagnosis-oriented study intent.', 'Included Personal history of diseases of blood and blood-forming organs (MAP) as a comorbidity covariate candidate focused on Personal history of diseases of blood and blood-forming organs.', 'Included Personal History of Diseases of Blood and Bloodforming Organs (VADC) as a comorbidity covariate candidate focused on Personal History of Blood and Bloodforming Organ Diseases.'] + recommendations: + cipher:3412 | Personal history of diseases of blood and blood-forming organs (MAP) | This phenotype identifies patients based on a MAP clustering algorithm applied to ICD codes, representing a personal history of blood disorders. It aligns directly with the study intent of patients w + cipher:17130 | Personal History of Diseases of Blood and Bloodforming Organs (VADC) | This phenotype represents a personal history of blood and bloodforming organ diseases, captured as a comorbidity covariate. It is a well-established phenotype definition from the VA Data Commons and a + +CASE 38: Patients with benign pancreatic conditions + intent_facets_raw: {'condition_or_topic': 'pancreatic conditions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'pancreatic conditions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + planning_reasoning: ['Selected shortlisted candidates align with pancreatic conditions as a diagnosis-oriented study intent.', 'Included Pancreas Transplant Recipient (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Conditions.', 'Included Extrahepatic Cholangiocarcinoma (Nguyen) as a diagnosis candidate focused on Pancreatic Cancer.', 'Included Chronic Pancreatitis (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Inflammation.'] + recommendations: + cipher:16954 | Pancreas Transplant Recipient (Nguyen) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + cipher:16952 | Extrahepatic Cholangiocarcinoma (Nguyen) | This phenotype identifies patients with pancreatic cancer based on ICD-10 codes, aligning with the study intent of investigating prevalence in veterans. + cipher:16953 | Chronic Pancreatitis (Nguyen) | Identifies veterans with pancreatic inflammation as a covariate, relevant to investigating pancreatic cancer prevalence. + +CASE 39: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Finger Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Thumb Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype identifies patients with osteoarthritis based on a MAP algorithm probability cutoff, aligning with the study intent of identifying patients with primary localized osteoarthritis. + cipher:4399 | Finger Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis, specifically focusing on finger OA, which is relevant to osteoarthritis. + cipher:4029 | Thumb Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis of the thumb, which is relevant to osteoarthritis. + +CASE 40: New users of dihydropyridine calcium channel blockers + intent_facets_raw: {'condition_or_topic': 'dihydropyridine calcium channel blockers', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dihydropyridine calcium channel blockers', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + planning_reasoning: ['Selected shortlisted candidates align with dihydropyridine calcium channel blockers as a medication based-oriented study intent.', 'Included [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension as a medication based candidate focused on dihydropyridine calcium channel blockers.', 'Included [P] New users of dihydropyridine calcium channel blockers as a medication based candidate focused on dihydropyridine calcium channel blockers.', 'Included [P] New users of Beta blockers nested in Acute Myocardial Infarction as a medication based candidate focused on Beta Blockers.'] + recommendations: + ohdsi:1047 | [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension | This phenotype directly addresses the study intent of identifying patients newly prescribed dihydropyridine calcium channel blockers for hypertension management. + ohdsi:1048 | [P] New users of dihydropyridine calcium channel blockers | This phenotype represents new users of dihydropyridine calcium channel blockers, aligning with the study's focus on identifying individuals starting this medication. + ohdsi:1052 | [P] New users of Beta blockers nested in Acute Myocardial Infarction | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 41: Veteran patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['cipher:13646', 'cipher:13656', 'cipher:17322', 'cipher:15442'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Nephritis Nephrosis Renal Sclerosis (Phecode) as a diagnosis candidate focused on Nephritis Nephrosis Renal Sclerosis.', 'Included Renal Sclerosis NOS (Phecode) as a diagnosis candidate focused on Renal Sclerosis.', 'Included Renal Sclerosis NOS (VADC) as a comorbidity covariate candidate focused on Renal Sclerosis.'] + recommendations: + cipher:13646 | Nephritis Nephrosis Renal Sclerosis (Phecode) | This Phecode definition directly addresses renal sclerosis, a core component of the study intent, using ICD-9 and ICD-10 codes. + cipher:13656 | Renal Sclerosis NOS (Phecode) | This Phecode definition is also relevant to the study intent, representing renal sclerosis based on ICD-9 and ICD-10 codes. + cipher:17322 | Renal Sclerosis NOS (VADC) | This phenotype identifies Renal Sclerosis as a comorbidity covariate, based on ICD-9 and ICD-10 codes, aligning with potential investigation of risk factors in veteran patients. + +CASE 42: Veteran patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Veteran', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Rheumatism'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Veteran', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Rheumatism'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Veterans']} + planning_shortlist: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + planning_reasoning: ['Selected shortlisted candidates align with polymyalgia rheumatica as a diagnosis-oriented study intent.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (Phecode) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (VADC) as a comorbidity covariate candidate focused on Polymyalgia Rheumatica.'] + recommendations: + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype is based on ICD10 codes, Med Codes, and Read codes v2 and aligns with the study intent of identifying patients with Polymyalgia Rheumatica. + cipher:13992 | Polymyalgia Rheumatica (Phecode) | This phenotype definition uses Phecodes, which are clinically relevant groupings of ICD codes, and aligns with the study intent of diagnosing Polymyalgia Rheumatica. + cipher:17453 | Polymyalgia Rheumatica (VADC) | This phenotype represents a comorbidity/covariate defined in the VA Data Commons based on ICD codes, which is relevant to studying Polymyalgia Rheumatica within the Million Veteran Program. + +CASE 43: Veteran patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Autoimmune hemolytic anemia (OHDSI) as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | This phenotype directly addresses the study intent of identifying the earliest diagnosis of Warm Autoimmune Hemolytic Anemia in veteran patients. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype captures patients with Autoimmune hemolytic anemia, which is relevant to the study's focus. + cipher:18441 | Autoimmune hemolytic anemia (OHDSI) | This phenotype is a standard OHDSI definition of Autoimmune hemolytic anemia and aligns with the study's goals. + +CASE 44: Veteran patients with cardiac complications + intent_facets_raw: {'condition_or_topic': 'cardiac complications', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'cardiac complications', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['cipher:15819', 'cipher:17258'] + planning_reasoning: ['Selected shortlisted candidates align with cardiac complications as a diagnosis-oriented study intent.', 'Included Complications of cardiac/vascular device, implant, and graft (gwPheWAS) as a complication candidate focused on Cardiac/Vascular Device Complications.', 'Included Cardiac Complications Not Elsewhere Classified (VADC) as a complication candidate focused on Cardiac Complications.'] + recommendations: + cipher:15819 | Complications of cardiac/vascular device, implant, and graft (gwPheWAS) | This phenotype directly addresses cardiac complications and is based on the Million Veteran Program phenome-wide GWAS, aligning with the study intent. + cipher:17258 | Cardiac Complications Not Elsewhere Classified (VADC) | This phenotype captures a broad range of cardiac complications from EHR data, providing a comprehensive approach to evaluating the study's focus. + +CASE 45: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:15684', 'cipher:14029', 'cipher:2703'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (Phecode) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (MAP) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis, based on ICD codes used in the Million Veteran Program (MVP) phenome-wide GWAS, directly relevant to the study interest. + cipher:14029 | Fasciitis (Phecode) | This phenotype definition represents a diagnosis of Fasciitis based on ICD-9 and ICD-10 codes, aligning with the study intent of identifying patients with Fasciitis. + cipher:2703 | Fasciitis (MAP) | Identifies patients with Fasciitis based on a MAP probability score exceeding a defined threshold; useful for exploring the phenotype given the study’s focus. + +CASE 46: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + planning_reasoning: ['Selected shortlisted candidates align with stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and mucositis (gwPheWAS) as a complication candidate focused on Stomatitis and mucositis.', 'Included Stomatitis and mucositis (ulcerative) (MAP) as a diagnosis candidate focused on Stomatitis and mucositis (ulcerative).'] + recommendations: + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | Directly addresses the study intent of patients with stomatitis or mucositis. + cipher:15333 | Stomatitis and mucositis (gwPheWAS) | Another phenotype representing stomatitis and mucositis, aligning with the study's focus. + cipher:3657 | Stomatitis and mucositis (ulcerative) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 47: Patients with Barretts esophagus + intent_facets_raw: {'condition_or_topic': 'Barretts esophagus', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Barretts esophagus', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13531', 'cipher:2187', 'cipher:15342'] + planning_reasoning: ['Selected shortlisted candidates align with Barretts esophagus as a diagnosis-oriented study intent.', "Included Barretts Esophagus (Phecode) as a diagnosis candidate focused on Barrett's Esophagus.", "Included Barrett's esophagus (MAP) as a diagnosis candidate focused on Barrett's esophagus.", "Included Barrett's esophagus (gwPheWAS) as a diagnosis candidate focused on Barrett's esophagus."] + recommendations: + cipher:13531 | Barretts Esophagus (Phecode) | This phenotype represents Barrett's Esophagus based on ICD-9 and ICD-10 codes, aligning directly with the study intent. + cipher:2187 | Barrett's esophagus (MAP) | This phenotype identifies Barrett's Esophagus using a MAP algorithm, providing a probability-based classification suitable for the study topic. + cipher:15342 | Barrett's esophagus (gwPheWAS) | This phenotype definition, derived from the Million Veteran Program, provides a clinically relevant definition of Barrett's Esophagus using ICD codes, aligning with the study intent. + +CASE 48: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13571', 'cipher:15376', 'cipher:3534'] + planning_reasoning: ['Selected shortlisted candidates align with regional enteritis as a diagnosis-oriented study intent.', 'Included Regional Enteritis (Phecode) as a diagnosis candidate focused on Regional Enteritis.', 'Included Regional enteritis (gwPheWAS) as a outcome candidate focused on Regional Enteritis.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.'] + recommendations: + cipher:13571 | Regional Enteritis (Phecode) | This phenotype is defined using the Phecode grouping for Regional Enteritis, based on ICD-9 and ICD-10 codes, aligning directly with the study intent. + cipher:15376 | Regional enteritis (gwPheWAS) | This phenotype defines Regional Enteritis based on ICD codes used in the Million Veteran Program phenome-wide GWAS, a relevant clinical outcome. + cipher:3534 | Regional enteritis (MAP) | This phenotype identifies patients with Regional enteritis based on a MAP algorithm probability cutoff of 0.45, providing a probabilistic diagnostic approach. + +CASE 49: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Finger Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Thumb Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype identifies patients with osteoarthritis based on a MAP algorithm probability cutoff. The study intent is focused on patients with primary localized osteoarthritis. + cipher:4399 | Finger Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis, specifically focusing on finger OA, which aligns with the study intent of localized osteoarthritis. + cipher:4029 | Thumb Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis of the thumb, aligning with the study intent of localized osteoarthritis. + +CASE 50: Patients with aortic valve disease + intent_facets_raw: {'condition_or_topic': 'aortic valve disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'aortic valve disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13226', 'cipher:13231', 'cipher:30301', 'cipher:17250'] + planning_reasoning: ['Selected shortlisted candidates align with aortic valve disease as a diagnosis-oriented study intent.', 'Included Aortic Valve Disease (Phecode) as a diagnosis candidate focused on Aortic Valve Disease.', 'Included Nonrheumatic Aortic Valve Disorders (Phecode) as a diagnosis candidate focused on Aortic Valve Disorders.', 'Included Nonrheumatic Aortic Valve Disorders (HDR UK) as a diagnosis candidate focused on Aortic Valve Disorders.'] + recommendations: + cipher:13226 | Aortic Valve Disease (Phecode) | Directly addresses the study intent of patients with aortic valve disease based on ICD-9 and ICD-10 codes. + cipher:13231 | Nonrheumatic Aortic Valve Disorders (Phecode) | Identifies patients with nonrheumatic aortic valve disorders, complementing the primary intent and utilizing Phecode definitions. + cipher:30301 | Nonrheumatic Aortic Valve Disorders (HDR UK) | Provides an alternative diagnosis based on HDR UK criteria, suitable for inclusion in analysis of aortic valve disorders. + +CASE 51: Patients with chronic periodontitis + intent_facets_raw: {'condition_or_topic': 'periodontitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'periodontitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13492', 'cipher:2371', 'cipher:13494', 'cipher:3397'] + planning_reasoning: ['Selected shortlisted candidates align with periodontitis as a diagnosis-oriented study intent.', 'Included Periodontitis (Acute or Chronic) (Phecode) as a diagnosis candidate focused on Periodontitis.', 'Included Chronic periodontitis (MAP) as a diagnosis candidate focused on Chronic Periodontitis.', 'Included Chronic Periodontitis (Phecode) as a diagnosis candidate focused on Chronic Periodontitis.'] + recommendations: + cipher:13492 | Periodontitis (Acute or Chronic) (Phecode) | Directly addresses the study intent of patients with chronic periodontitis using the established Phecode system based on ICD codes. + cipher:2371 | Chronic periodontitis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:13494 | Chronic Periodontitis (Phecode) | Another Phecode definition aligned with the study intent, providing an alternative representation of chronic periodontitis based on ICD codes. + +CASE 52: Patients with hypertensive chronic kidney disease + intent_facets_raw: {'condition_or_topic': 'hypertensive chronic kidney disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'hypertensive chronic kidney disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:923', 'ohdsi:1191', 'ohdsi:964', 'cipher:2846'] + planning_reasoning: ['Selected shortlisted candidates align with hypertensive chronic kidney disease as a diagnosis-oriented study intent.', 'Included [P] Kidney disease as a diagnosis candidate focused on Chronic Kidney Disease.', 'Included [P] Chronic kidney disease or end stage renal disease 10 as a comorbidity covariate candidate focused on Chronic Kidney Disease.', 'Included [P] Chronic kidney disease as a comorbidity covariate candidate focused on Chronic Kidney Disease.'] + recommendations: + ohdsi:923 | [P] Kidney disease | This phenotype represents the initial record of Chronic Kidney Disease, aligning with the study intent of patients with hypertensive chronic kidney disease. + ohdsi:1191 | [P] Chronic kidney disease or end stage renal disease 10 | This phenotype is directly related to chronic kidney disease, a key component of the study intent. + ohdsi:964 | [P] Chronic kidney disease | This phenotype provides a comprehensive definition of chronic kidney disease, fitting the study's focus on hypertensive chronic kidney disease. + +CASE 53: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Primary diagnosis related to heart muscle dysfunction'], 'care_setting_cues': ['Hospitalized patients with heart failure', 'Clinic setting for ongoing management'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Primary diagnosis related to heart muscle dysfunction'], 'care_setting_cues': ['Hospitalized patients with heart failure', 'Clinic setting for ongoing management'], 'population_cues': ['Adult patients']} + planning_shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.', 'Included Other Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Cardiomyopathy.'] + recommendations: + cipher:31252 | Cardiomyopathy (Knight) | This HDR UK phenotype identifies patients with Cardiomyopathy based on ICD-10, SNOMED CT and Read Codes v2, which is relevant to the study intent. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype directly addresses Takotsubo cardiomyopathy, a specific type of cardiomyopathy, aligning with the study intent. + cipher:30192 | Other Cardiomyopathy (HDR UK) | This HDR UK phenotype identifies patients with a diagnosis of Other Cardiomyopathy based on ICD10 codes and hospitalizations, which is relevant to the study intent. + +CASE 54: Patients with scleritis or episcleritis + intent_facets_raw: {'condition_or_topic': 'scleritis or episcleritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'inflammatory eye disease', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['inflammatory eye disease']} + intent_facets_effective: {'condition_or_topic': 'scleritis or episcleritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'inflammatory eye disease', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['inflammatory eye disease']} + planning_shortlist: ['cipher:30069', 'cipher:3581', 'cipher:13186'] + planning_reasoning: ['Selected shortlisted candidates align with scleritis or episcleritis as a diagnosis-oriented study intent.', 'Included Scleritis and Episcleritis (HDR UK) as a diagnosis candidate focused on Scleritis and Episcleritis.', 'Included Scleritis and episcleritis (MAP) as a diagnosis candidate focused on Scleritis and episcleritis.', 'Included Scleritis and Episcleritis (Phecode) as a diagnosis candidate focused on Scleritis and Episcleritis.'] + recommendations: + cipher:30069 | Scleritis and Episcleritis (HDR UK) | This phenotype directly addresses the study intent of identifying patients with scleritis and episcleritis, based on HDR UK criteria. + cipher:3581 | Scleritis and episcleritis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:13186 | Scleritis and Episcleritis (Phecode) | This phenotype uses the Phecode mapping, which relies on ICD codes to define Scleritis and Episcleritis, aligning with the study's focus. + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + intent_facets_raw: {'condition_or_topic': 'Carbohydrate transport and metabolism disorder', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Carbohydrate transport and metabolism disorder', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + planning_reasoning: ['Selected shortlisted candidates align with Carbohydrate transport and metabolism disorder as a diagnosis-oriented study intent.', 'Included Other Disorders of Carbohydrate Transport and Metabolism (Phecode) as a comorbidity covariate candidate focused on Carbohydrate Transport and Metabolism Disorders.', 'Included Disorders of Carbohydrate Transport and Metabolism (Phecode) as a comorbidity covariate candidate focused on Disorders of Carbohydrate Transport and Metabolism.', 'Included Other disorders of carbohydrate transport and metabolism (MAP) as a comorbidity covariate candidate focused on Carbohydrate Transport Metabolism.'] + recommendations: + cipher:12820 | Other Disorders of Carbohydrate Transport and Metabolism (Phecode) | This phenotype is a closely related Phecode representing other disorders of carbohydrate transport and metabolism, also listed as a comorbidity covariate. + cipher:12818 | Disorders of Carbohydrate Transport and Metabolism (Phecode) | This phenotype aligns directly with the study intent, representing a collection of ICD codes related to carbohydrate transport and metabolism, and is described as a comorbidity covariate. + cipher:3256 | Other disorders of carbohydrate transport and metabolism (MAP) | This phenotype represents a comorbidity/covariate based on an unsupervised clustering algorithm (MAP) applied to ICD codes, which may be relevant to studying carbohydrate transport and metabolism diso + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + intent_facets_raw: {'condition_or_topic': 'acetaminophen', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'inpatient', 'population_cue': 'hospital setting', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['inpatient'], 'population_cues': ['hospital setting']} + intent_facets_effective: {'condition_or_topic': 'acetaminophen', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'inpatient', 'population_cue': 'hospital setting', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['inpatient'], 'population_cues': ['hospital setting']} + planning_shortlist: ['ohdsi:1187'] + planning_reasoning: ['Selected shortlisted candidates align with acetaminophen as a medication based-oriented study intent.', 'Included [P] acetaminophen exposure 10 as a medication based candidate focused on acetaminophen exposure.'] + recommendations: + ohdsi:1187 | [P] acetaminophen exposure 10 | This phenotype represents acetaminophen exposure with a 30-day persistence window, aligning directly with the study intent of patients with acetaminophen exposure in the hospital setting. + +CASE 57: Patients diagnosed with dyschromia and vitiligo + intent_facets_raw: {'condition_or_topic': 'dyschromia and vitiligo', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dyschromia and vitiligo', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13900', 'cipher:2628', 'ohdsi:471', 'cipher:30727'] + planning_reasoning: ['Selected shortlisted candidates align with dyschromia and vitiligo as a diagnosis-oriented study intent.', 'Included Dyschromia and Vitiligo (Phecode) as a diagnosis candidate focused on Dyschromia and Vitiligo.', 'Included Dyschromia and Vitiligo (MAP) as a diagnosis candidate focused on Dyschromia and Vitiligo.', 'Included [P][R] Vitiligo as a diagnosis candidate focused on Vitiligo.'] + recommendations: + cipher:13900 | Dyschromia and Vitiligo (Phecode) | This phenotype defines Dyschromia and Vitiligo based on ICD-9 and ICD-10 codes, aligning with the study's focus. + cipher:2628 | Dyschromia and Vitiligo (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:471 | [P][R] Vitiligo | This phenotype represents the diagnosis of Vitiligo, directly addressing the study intent of dyschromia and vitiligo. + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + intent_facets_raw: {'condition_or_topic': 'acute hepatic injury', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'no pre-existing liver disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['no pre-existing liver disease']} + intent_facets_effective: {'condition_or_topic': 'acute hepatic injury', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'no pre-existing liver disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['no pre-existing liver disease']} + planning_shortlist: ['ohdsi:716', 'ohdsi:735', 'ohdsi:294', 'ohdsi:293'] + planning_reasoning: ['Selected shortlisted candidates align with acute hepatic injury as a diagnosis-oriented study intent.', 'Included [P] Acute Hepatic Injury as a severity candidate focused on Acute Hepatic Injury.', 'Included [P] Acute Liver Injury indexed on diagnosis or symptoms with no chronic hepatic failure as a diagnosis candidate focused on Acute Liver Injury.', 'Included [P] Acute Hepatic Injury with no pre-existing liver disease as a diagnosis candidate focused on Acute Liver Injury.'] + recommendations: + ohdsi:716 | [P] Acute Hepatic Injury | This phenotype directly aligns with the study intent: measuring severity of acute liver injury, including toxic liver disease and hepatic failure. + ohdsi:735 | [P] Acute Liver Injury indexed on diagnosis or symptoms with no chronic hepatic failure | This phenotype identifies patients with acute liver injury, excluding those with prior chronic hepatic failure, which is relevant to the study intent of excluding pre-existing liver disease. + ohdsi:294 | [P] Acute Hepatic Injury with no pre-existing liver disease | This phenotype specifically targets acute liver injury in patients without pre-existing liver disease, perfectly matching the study's focus. + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + intent_facets_raw: {'condition_or_topic': 'nerve plexus lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'nerve plexus lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + planning_reasoning: ['Selected shortlisted candidates align with nerve plexus lesions as a diagnosis-oriented study intent.', 'Included Nerve Plexus Lesions (Phecode) as a diagnosis candidate focused on Nerve Plexus Lesions.', 'Included Nerve Root Lesions (Phecode) as a diagnosis candidate focused on Nerve Root Lesions.', 'Included Nerve plexus lesions (gwPheWAS) as a comorbidity covariate candidate focused on Nerve Plexus Lesions.'] + recommendations: + cipher:13084 | Nerve Plexus Lesions (Phecode) | This phenotype directly aligns with the study intent of defining patients with nerve plexus lesions using the PheCode system, which maps ICD codes to clinically relevant phenotypes. + cipher:13085 | Nerve Root Lesions (Phecode) | This phenotype, also defined by PheCode, is closely related to nerve plexus lesions and represents a relevant diagnosis to consider within the study scope. + cipher:14974 | Nerve plexus lesions (gwPheWAS) | This phenotype, derived from the Million Veteran Program, utilizes Phecodes for defining nerve plexus lesions as a comorbidity covariate, aligning with potential research applications. + +CASE 60: patients with a diagnosis of PRES + intent_facets_raw: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with PRES as a diagnosis-oriented study intent.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | The study intent is for patients with a diagnosis of PRES, and this phenotype represents the diagnosis of PRES. + +CASE 61: patients with chronic ulcerative colitis + intent_facets_raw: {'condition_or_topic': 'Ulcerative colitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Ulcerative colitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:860', 'ohdsi:458', 'cipher:30724'] + planning_reasoning: ['Selected shortlisted candidates align with Ulcerative colitis as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Ulcerative colitis as a diagnosis candidate focused on Ulcerative colitis.', 'Included [P][R] Ulcerative colitis as a diagnosis candidate focused on Ulcerative colitis.', 'Included Ulcerative Colitis (Kuan) as a diagnosis candidate focused on Ulcerative Colitis.'] + recommendations: + ohdsi:860 | [P] Earliest event of Ulcerative colitis | This phenotype identifies the earliest diagnosis of Ulcerative colitis, which is relevant to the study intent of patients with chronic ulcerative colitis. + ohdsi:458 | [P][R] Ulcerative colitis | This phenotype captures all events of Ulcerative colitis, suitable for studying chronic patients. + cipher:30724 | Ulcerative Colitis (Kuan) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 62: Veteran patients with developmental disorders that are pervasive + intent_facets_raw: {'condition_or_topic': 'developmental disorders', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'developmental disorders', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Veterans']} + planning_shortlist: ['cipher:12996', 'cipher:17197', 'cipher:17193', 'cipher:3415'] + planning_reasoning: ['Selected shortlisted candidates align with developmental disorders as a diagnosis-oriented study intent.', 'Included Pervasive Developmental Disorders (Phecode) as a diagnosis candidate focused on Pervasive Developmental Disorders.', 'Included Develomental Delays and Disorders (VADC) as a comorbidity covariate candidate focused on Developmental Delays and Disorders.', 'Included Pervasive Developmental Disorders (VADC) as a comorbidity covariate candidate focused on Pervasive Developmental Disorders.'] + recommendations: + cipher:12996 | Pervasive Developmental Disorders (Phecode) | This phenotype directly aligns with the study intent of identifying patients with Pervasive Developmental Disorders based on ICD codes. It is defined using the Phecode grouping, a clinically relevant + cipher:17197 | Develomental Delays and Disorders (VADC) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + cipher:17193 | Pervasive Developmental Disorders (VADC) | This phenotype is another definition of Pervasive Developmental Disorders utilizing ICD codes, consistent with the user's intent and mirroring the VA Data Commons' approach. + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + intent_facets_raw: {'condition_or_topic': 'acute myocardial infarction', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'multiple diagnoses', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['multiple diagnoses of a condition'], 'care_setting_cues': ['inpatient care', 'emergency department care'], 'population_cues': ['patients with multiple diagnoses']} + intent_facets_effective: {'condition_or_topic': 'acute myocardial infarction', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'multiple diagnoses', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['multiple diagnoses of a condition'], 'care_setting_cues': ['inpatient care', 'emergency department care'], 'population_cues': ['patients with multiple diagnoses']} + planning_shortlist: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + planning_reasoning: ['Selected shortlisted candidates align with acute myocardial infarction as a diagnosis-oriented study intent.', 'Included [P][R] Acute myocardial infarction as a diagnosis candidate focused on Acute myocardial infarction.', 'Included [P] FDA AESI Acute Myocardial Infarction or its complications as a diagnosis candidate focused on Acute Myocardial Infarction.', 'Included Acute Myocardial Infarction (VA CAUSAL Methods) as a diagnosis candidate focused on Acute Myocardial Infarction.'] + recommendations: + ohdsi:510 | [P][R] Acute myocardial infarction | This phenotype identifies patients with Acute myocardial infarction, aligning directly with the study intent of identifying patients with at least two recorded diagnoses of the condition. + ohdsi:1081 | [P] FDA AESI Acute Myocardial Infarction or its complications | This phenotype identifies patients with Acute Myocardial Infarction, a critical diagnosis in cardiology, and satisfies the study's requirement for at least two recorded diagnoses. + cipher:18982 | Acute Myocardial Infarction (VA CAUSAL Methods) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + intent_facets_raw: {'condition_or_topic': 'antiphospholipid syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'antiphospholipid syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:632', 'ohdsi:781'] + planning_reasoning: ['Selected shortlisted candidates align with antiphospholipid syndrome as a diagnosis-oriented study intent.', 'Included [P][R] Antiphospholipid syndrome as a comorbidity covariate candidate focused on Antiphospholipid syndrome.', 'Included [P] Antiphospholipid syndrome as a comorbidity covariate candidate focused on Antiphospholipid Syndrome.'] + recommendations: + ohdsi:632 | [P][R] Antiphospholipid syndrome | This phenotype represents all events of Antiphospholipid syndrome, aligning with the study intent of patients diagnosed with this condition in the outpatient setting. + ohdsi:781 | [P] Antiphospholipid syndrome | This phenotype represents the first occurrence of Antiphospholipid Syndrome, which is a comorbidity and covariate of interest in this patient population. + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + intent_facets_raw: {'condition_or_topic': 'ADRD or late-stage dementia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + intent_facets_effective: {'condition_or_topic': 'ADRD or late-stage dementia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + planning_shortlist: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + planning_reasoning: ['Selected shortlisted candidates align with ADRD or late-stage dementia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Dementia as a diagnosis candidate focused on Dementia.', 'Included [P][R] Dementia as a diagnosis candidate focused on Dementia.', 'Included Dementia Alzheimer Vascular Mixed Nonspecific (Hoile) as a diagnosis candidate focused on Dementia.'] + recommendations: + ohdsi:864 | [P] Earliest event of Dementia | This phenotype identifies the diagnosis of dementia in patients 18 years or older, suitable for the study intent. + ohdsi:651 | [P][R] Dementia | This phenotype represents Dementia, a condition primarily used for diagnosis, aligning with the study intent of identifying older adults with ADRD or late-stage dementia. + cipher:31120 | Dementia Alzheimer Vascular Mixed Nonspecific (Hoile) | This phenotype identifies individuals with dementia, including Alzheimer's, vascular, or mixed dementia based on Read codes, aligning with the study intent. + +CASE 66: patients who experienced a GI bleed adverse event + intent_facets_raw: {'condition_or_topic': 'GI bleed', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'adverse event', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'GI bleed', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'adverse event', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:482'] + planning_reasoning: ['Selected shortlisted candidates align with GI bleed as a outcome-oriented study intent.', 'Included [P][R] Gastrointestinal hemorrhage as a outcome candidate focused on Gastrointestinal hemorrhage.'] + recommendations: + ohdsi:482 | [P][R] Gastrointestinal hemorrhage | This phenotype represents the event of Gastrointestinal hemorrhage, an adverse patient outcome, directly aligning with the study intent of patients experiencing a GI bleed adverse event. + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + intent_facets_raw: {'condition_or_topic': 'COVID-19', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient setting'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'COVID-19', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient setting'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:678', 'cipher:31308'] + planning_reasoning: ['Selected shortlisted candidates align with COVID-19 as a diagnosis-oriented study intent.', 'Included [P][R] COVID-19 as a diagnosis candidate focused on COVID-19.', 'Included Confirmed COVID-19 Diagnoses (Knight) as a diagnosis candidate focused on Confirmed COVID-19.'] + recommendations: + ohdsi:678 | [P][R] COVID-19 | This phenotype represents the diagnosis of COVID-19, directly aligning with the study intent of patients who received a COVID-19 diagnosis in the outpatient setting. + cipher:31308 | Confirmed COVID-19 Diagnoses (Knight) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 68: veterans who experienced an abdominal aortic aneurysm + intent_facets_raw: {'condition_or_topic': 'abdominal aortic aneurysm', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veterans', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'abdominal aortic aneurysm', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veterans', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['cipher:29240', 'cipher:29169', 'cipher:15196'] + planning_reasoning: ['Selected shortlisted candidates align with abdominal aortic aneurysm as a diagnosis-oriented study intent.', 'Included Abdominal Aortic Aneurysm (PheKB) as a diagnosis candidate focused on Abdominal Aortic Aneurysm (AAA).', 'Included Abdominal Aortic Aneurysm (HDR UK) as a diagnosis candidate focused on Abdominal Aortic Aneurysm (AAA).', 'Included Abdominal aortic aneurysm (gwPheWAS) as a comorbidity covariate candidate focused on Abdominal Aortic Aneurysm.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:1093.'] + recommendations: + cipher:29240 | Abdominal Aortic Aneurysm (PheKB) | This phenotype directly addresses the study intent of identifying veterans with an abdominal aortic aneurysm, using a code-based approach. + cipher:29169 | Abdominal Aortic Aneurysm (HDR UK) | This phenotype provides an alternative code-based definition of abdominal aortic aneurysm, offering a second relevant recommendation. + cipher:15196 | Abdominal aortic aneurysm (gwPheWAS) | This phenotype is based on Phecodes derived from ICD codes and was used in the Million Veteran Program, aligning with the veteran population focus. + +CASE 69: patients with COPD according to diagnostic codes in the EHR + intent_facets_raw: {'condition_or_topic': 'COPD', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'COPD', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:29794', 'cipher:4241', 'cipher:29756', 'cipher:31297'] + planning_reasoning: ['Selected shortlisted candidates align with COPD as a diagnosis-oriented study intent.', 'Included Chronic Obstructive Pulmonary Disease, ICD, Read, and Med Codes (HDR UK) as a diagnosis candidate focused on COPD.', 'Included Chronic Obstructive Pulmonary Disease, (VINCI) as a diagnosis candidate focused on COPD.', 'Included Chronic Obstructive Pulmonary Disease (HR UK) as a diagnosis candidate focused on Chronic Obstructive Pulmonary Disease (COPD).'] + recommendations: + cipher:29794 | Chronic Obstructive Pulmonary Disease, ICD, Read, and Med Codes (HDR UK) | This phenotype directly aligns with the study intent of identifying patients with COPD based on ICD codes and related diagnostic codes. + cipher:4241 | Chronic Obstructive Pulmonary Disease, (VINCI) | This phenotype represents a COPD diagnosis as defined by the VINCI dataset, relevant to the study's focus. + cipher:29756 | Chronic Obstructive Pulmonary Disease (HR UK) | This phenotype provides a comprehensive diagnosis of COPD based on multiple coding systems, aligning with the study’s intention. + +CASE 70: patients hospitalized at least once for heart failure + intent_facets_raw: {'condition_or_topic': 'heart failure', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'patients hospitalized', 'validation_preference': 'required', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': ['hospitalized'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'heart failure', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'patients hospitalized', 'validation_preference': 'required', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': ['hospitalized'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + planning_reasoning: ['Selected shortlisted candidates align with heart failure as a diagnosis-oriented study intent.', 'Included [P] Heart failure2 as a diagnosis candidate focused on Heart Failure.', 'Included [P] Acute Heart failure from legend as a diagnosis candidate focused on Heart Failure.', 'Included Heart Failure (BOS CSPCC) as a diagnosis candidate focused on Heart Failure.'] + recommendations: + ohdsi:934 | [P] Heart failure2 | This phenotype represents the initial diagnosis of Heart Failure, aligning with the study intent of patients hospitalized with heart failure. + ohdsi:1303 | [P] Acute Heart failure from legend | This phenotype identifies the first recorded episode of heart failure in a patient, followed by at least one subsequent heart failure condition record, directly relevant to the study intent. + cipher:16152 | Heart Failure (BOS CSPCC) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + intent_facets_raw: {'condition_or_topic': 'diabetes', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'diabetes', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:30803', 'cipher:31250', 'cipher:31195', 'cipher:30170'] + planning_reasoning: ['Selected shortlisted candidates align with diabetes as a medication based-oriented study intent.', 'Included Meglitinide (Kontopantelis) as a medication based candidate focused on Meglitinide Use in Type 2 Diabetes.', 'Included Diabetes and Diabates Medication (Knight) as a comorbidity covariate candidate focused on Diabetes.', 'Included Diabetes, Drug Code (Paige) as a comorbidity covariate candidate focused on Diabetes.'] + recommendations: + cipher:30803 | Meglitinide (Kontopantelis) | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + cipher:31250 | Diabetes and Diabates Medication (Knight) | This phenotype aligns with the study intent of identifying patients with diabetes based on medication use (ICD10 and SNOMED CT codes). + cipher:31195 | Diabetes, Drug Code (Paige) | This phenotype, based on Read codes v2, represents diabetes as a comorbidity covariate, relevant to medication-based studies. + +CASE 1: Patients with an implanted cardiac defibrillator + cipher:15146 | 22.6125 | [('topic_primary', 'Cardiac Defibrillator')] + cipher:2288 | 15.6725 | [('topic_primary', 'Cardiac defibrillator in situ')] + cipher:13288 | 15.1325 | [('topic_primary', 'Cardiac Defibrillator in Situ')] + cipher:16289 | -5.3875 | [('topic_mismatch', 'Bleeding')] + cipher:30773 | -5.3875 | [('topic_mismatch', 'Trifascicular Block')] + cipher:30192 | -5.3875 | [('topic_mismatch', 'Cardiomyopathy')] + cipher:30617 | -12.8875 | [('topic_mismatch', 'Coronary Heart Disease')] + ohdsi:1102 | -21.65 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 2: Patients diagnosed with fasciitis + cipher:15684 | 35.6925 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:2703 | 35.6525 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:14029 | 28.1125 | [('topic_primary', 'Fasciitis')] + ohdsi:1075 | -3.69 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.75 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + +CASE 3: Patients with acute prostatitis + ohdsi:283 | 34.35 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:3500 | 34.1125 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:15498 | 34.1125 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:13720 | 28.9425 | [('topic_primary', 'Acute Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Prostatitis"]}')] + cipher:2054 | 28.9025 | [('topic_primary', 'Acute Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Prostatitis"]}')] + cipher:15499 | 15.1725 | [('topic_primary', 'Acute Prostatitis')] + cipher:18650 | 13.7554 | [('topic_primary', 'Chronic Prostatitis or Chronic Pelvic Pain Syndrome (MVP)')] + ohdsi:1301 | -5.23 | [('topic_mismatch', 'Urinary Tract Infection')] + +CASE 4: Patients who underwent esophagectomy + ohdsi:1097 | 32.1233 | [('topic_primary', 'Esophagectomy'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Esophagectomy"]}')] + ohdsi:1294 | 26.33 | [('topic_primary', 'Esophagectomy')] + ohdsi:870 | 26.31 | [('topic_primary', 'Esophagectomy')] + ohdsi:1309 | 22.35 | [('topic_primary', 'Esophagectomy')] + ohdsi:877 | -5.75 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1289 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -9.25 | [('topic_mismatch', 'Surgery')] + cipher:30087 | -9.3875 | [('topic_mismatch', 'Primary Malignancy, Oesophageal')] + +CASE 5: Patients diagnosed with peripheral neuritis + ohdsi:388 | 35.85 | [('topic_primary', 'Peripheral neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral neuritis"]}')] + ohdsi:389 | 20.08 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathy"]}')] + ohdsi:238 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + ohdsi:540 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + cipher:30768 | 19.8825 | [('topic_primary', 'Peripheral Neuropathies'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathies"]}')] + ohdsi:236 | 19.6233 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic Peripheral Neuropathy"]}')] + cipher:2808 | 19.1125 | [('topic_primary', 'Peripheral neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hereditary and idiopathic peripheral neuropathy"]}')] + ohdsi:541 | 18.31 | [('topic_primary', 'Idiopathic peripheral neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic peripheral neuropathy"]}')] + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + ohdsi:760 | 0.35 | [] + ohdsi:757 | 0.33 | [] + ohdsi:1057 | 0.29 | [] + ohdsi:1042 | 0.27 | [] + ohdsi:1040 | 0.25 | [] + ohdsi:1058 | 0.25 | [] + ohdsi:1059 | 0.25 | [] + ohdsi:1066 | 0.25 | [] + +CASE 7: Patients with allergic rhinitis + ohdsi:508 | 35.85 | [('topic_primary', 'Allergic rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic rhinitis"]}')] + ohdsi:367 | 35.83 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:2081 | 35.6125 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:30258 | 30.1725 | [('topic_primary', 'Allergic and Chronic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic and chronic rhinitis diagnosis"]}')] + cipher:13411 | 28.1125 | [('topic_primary', 'Allergic Rhinitis')] + ohdsi:12 | 23.2067 | [('topic_primary', 'Rhinitis'), ('topic_context', '{"context_conditions": ["Common cold", "Sinusitis", "Respiratory Symptoms"], "target_conditions": ["Rhinitis"]}')] + cipher:15246 | 20.6125 | [('topic_primary', 'Allergic Rhinitis')] + ohdsi:370 | 16.25 | [('topic_primary', 'Allergic Disorder')] + +CASE 8: Patients with ischemic heart disease + ohdsi:654 | 34.35 | [('topic_primary', 'Ischemic heart disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic heart disease"]}')] + cipher:16261 | 34.1525 | [('topic_primary', 'Ischemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic Heart Disease"]}')] + cipher:29560 | 34.0375 | [('topic_primary', 'Ischemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic Heart Disease"]}')] + cipher:29218 | 23.6325 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:30610 | 23.6125 | [('topic_primary', 'Ischaemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischaemic Heart Disease"]}')] + cipher:29772 | 23.6125 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:30617 | 11.1725 | [('topic_primary', 'Coronary Heart Disease')] + cipher:31868 | 9.7792 | [('topic_primary', 'Chronic Ischaemic Heart Disease')] + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + cipher:2798 | 26.6125 | [('topic_primary', 'Hemorrhage in early pregnancy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hemorrhage in early pregnancy"]}')] + cipher:2643 | 21.2125 | [('topic_primary', 'Early or threatened labor; hemorrhage in early pregnancy')] + cipher:13824 | 16.6925 | [('topic_primary', 'Early Labor Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Early Labor Hemorrhage"]}')] + cipher:13827 | 14.2554 | [('topic_primary', 'Hemorrhage in Early Pregnancy')] + cipher:15566 | 13.6125 | [('topic_primary', 'Pregnancy Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pregnancy Hemorrhage"]}')] + cipher:17376 | 7.3982 | [('topic_primary', 'Hemorrhage')] + ohdsi:1434 | 3.5957 | [('topic_primary', 'Pregnancy Loss')] + ohdsi:1431 | 3.5757 | [('topic_primary', 'Ectopic Pregnancy')] + +CASE 10: Patients who underwent lung resection + ohdsi:1268 | 28.6833 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative Afib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:1308 | 24.5833 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative AFib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:869 | 22.81 | [('topic_primary', 'Lung Resection')] + ohdsi:1293 | 22.79 | [('topic_primary', 'Lung Resection')] + ohdsi:1289 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:877 | -9.25 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1106 | -9.25 | [('topic_mismatch', 'Surgery')] + +CASE 11: Patients with laryngitis + ohdsi:355 | 34.35 | [('topic_primary', 'Laryngitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Laryngitis"]}')] + cipher:2360 | 28.8625 | [('topic_primary', 'Chronic laryngitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic laryngitis"]}')] + cipher:15233 | 26.4458 | [('topic_primary', 'Acute laryngitis and tracheitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute laryngitis", "Tracheitis"]}')] + cipher:13398 | 26.2375 | [('topic_primary', 'Acute Laryngitis and Tracheitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Laryngitis and Tracheitis"]}')] + cipher:2046 | 20.6125 | [('topic_primary', 'Acute laryngitis and tracheitis')] + ohdsi:327 | -5.19 | [('topic_mismatch', 'Pharyngitis')] + ohdsi:9 | -5.21 | [('topic_mismatch', 'Sore throat')] + cipher:29206 | -5.3075 | [('topic_mismatch', 'Peritonsillar Abscess')] + +CASE 12: Patients with regional enteritis + cipher:3534 | 35.6925 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + cipher:13571 | 28.1125 | [('topic_primary', 'Regional Enteritis')] + cipher:15376 | 21.1125 | [('topic_primary', 'Regional Enteritis')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:775 | -3.75 | [('topic_mismatch', 'Inflammatory Bowel Disease')] + cipher:30115 | -3.8875 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + +CASE 13: Patients with renal sclerosis + cipher:13646 | 28.9625 | [('topic_primary', 'Nephritis Nephrosis Renal Sclerosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nephritis Nephrosis Renal Sclerosis"]}')] + cipher:13656 | 26.6925 | [('topic_primary', 'Renal Sclerosis')] + cipher:17322 | 19.1325 | [('topic_primary', 'Renal Sclerosis')] + cipher:3541 | 19.1125 | [('topic_primary', 'Renal Sclerosis')] + ohdsi:1003 | 18.5 | [('topic_primary', 'Renal cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal cancer"]}')] + ohdsi:481 | 16.75 | [('topic_primary', 'Renal failure syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal failure syndrome"]}')] + ohdsi:467 | 7.25 | [('topic_primary', 'Systemic sclerosis')] + cipher:31257 | 7.1125 | [('topic_primary', 'Renal disease')] + +CASE 14: Patients with cardiomyopathy + cipher:30192 | 32.9625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 32.5058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 29.08 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 28.9025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 27.5292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 19.1325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + cipher:31280 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + +CASE 15: Patients with a diagnosis of PRES + ohdsi:223 | 21.95 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:1317 | -3.69 | [('topic_mismatch', 'Reye’s syndrome')] + ohdsi:516 | -3.71 | [('topic_mismatch', 'Thrombotic microangiopathy')] + ohdsi:248 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:520 | -3.75 | [('topic_mismatch', 'Hypertensive disorder')] + ohdsi:229 | -3.75 | [('topic_mismatch', 'Progressive Multifocal Leukoencephalopathy (PML)')] + ohdsi:1084 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + +CASE 16: Patients with anorexia nervosa + ohdsi:1340 | 34.29 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:17187 | 34.1125 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:2117 | 34.1125 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:12990 | 26.6125 | [('topic_primary', 'Anorexia Nervosa')] + ohdsi:1339 | 18.5 | [('topic_primary', 'Bulimia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Bulimia Nervosa"]}')] + cipher:30163 | 5.9425 | [('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia and Bulimia Nervosa"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1341 | -5.25 | [('topic_mismatch', 'Eating Disorders')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + +CASE 17: Patients with dizziness, vertigo, or motion sickness + cipher:3402 | 19.8625 | [('topic_primary', 'Vertigo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vertigo"]}')] + cipher:13215 | 18.8125 | [('topic_primary', 'Dizziness and Giddiness (Lightheadedness and Vertigo)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dizziness and Giddiness (Lightheadedness and Vertigo)"]}')] + ohdsi:893 | 16.29 | [('topic_primary', 'Vertigo')] + cipher:2623 | 14.7792 | [('topic_primary', 'Dizziness and giddiness (Light-headedness and vertigo)')] + ohdsi:244 | 12.35 | [('topic_primary', 'Dizziness')] + cipher:15084 | 6.7792 | [('topic_primary', 'Dizziness and giddiness')] + cipher:4387 | -3.8675 | [('topic_mismatch', 'Vestibular Disorders')] + ohdsi:891 | -7.69 | [('topic_mismatch', 'Nausea')] + +CASE 18: Patients with polymyalgia rheumatica + cipher:30277 | 34.2125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + cipher:13992 | 26.6725 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:3460 | 19.1925 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:17453 | 19.1525 | [('topic_primary', 'Polymyalgia Rheumatica')] + ohdsi:670 | -5.25 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -5.25 | [('topic_mismatch', 'Joint stiffness')] + ohdsi:605 | -12.25 | [('topic_mismatch', 'Muscle pain')] + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + cipher:2915 | 21.1125 | [('topic_primary', 'Insulin Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Insulin Adverse Effects"]}')] + cipher:2064 | 18.4125 | [('topic_primary', 'Adrenal Cortical Steroids Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Adrenal Cortical Steroids Adverse Effects"]}')] + cipher:2123 | 18.3125 | [('topic_primary', 'Lipid-Lowering Drug Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Lipid-Lowering Drug Adverse Effects"]}')] + cipher:3573 | 16.1125 | [('topic_primary', 'Salicylates adverse effects')] + cipher:2125 | 16.1125 | [('topic_primary', 'Antirheumatics adverse effects')] + cipher:14303 | 14.8392 | [('topic_primary', 'Adrenal Steroid Adverse Effects')] + cipher:17565 | 14.0192 | [('topic_primary', 'Adverse Effects of Adrenal Steroids')] + cipher:3180 | 13.9792 | [('topic_primary', 'Opiate Use and Adverse Effects')] + +CASE 20: Patients with low blood pressure + cipher:13390 | 35.6125 | [('topic_primary', 'Hypotension'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypotension"]}')] + ohdsi:339 | 21.33 | [('topic_primary', 'Hypotension')] + ohdsi:890 | 21.31 | [('topic_primary', 'Hypotension')] + ohdsi:526 | 17.25 | [('topic_primary', 'Orthostatic hypotension')] + ohdsi:997 | 9.35 | [('topic_primary', 'Hypotension')] + ohdsi:954 | -3.75 | [('topic_mismatch', 'Syncope')] + cipher:4093 | -3.8875 | [('topic_mismatch', 'Peripheral Vascular Disease')] + ohdsi:445 | -10.75 | [('topic_mismatch', 'Hypoglycemia')] + +CASE 21: Patients with encephalopathy + ohdsi:194 | 34.33 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + cipher:2664 | 34.1125 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:331 | 32.31 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:223 | 25.7867 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)'), ('topic_context', '{"context_conditions": ["Eclampsia", "Hypertensive encephalopathy"], "target_conditions": ["Posterior reversible encepha... [truncated 19 chars]')] + ohdsi:936 | -2.3929 | [('topic_context', '{"context_conditions": ["Hepatic necrosis", "Hepatic coma", "Hepatic encephalopathy", "Liver failure", "Liver injury"], ... [truncated 41 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1333 | -5.21 | [('topic_mismatch', 'Advanced Liver Disease')] + ohdsi:544 | -5.25 | [('topic_mismatch', 'Encephalitis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + +CASE 22: Patients with birdshot chorioretinitis + ohdsi:1223 | 8.0167 | [('topic_context', '{"context_conditions": ["Uveitis"], "target_conditions": ["Birdshot chorioretinitis"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1226 | -3.73 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -3.75 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:755 | -3.75 | [('topic_mismatch', 'Uveitis')] + cipher:30185 | -3.8875 | [('topic_mismatch', 'Posterior Uveitis')] + cipher:13118 | -3.8875 | [('topic_mismatch', 'Chorioretinal Inflammations Scars')] + cipher:2341 | -3.8875 | [('topic_mismatch', 'Chorioretinal inflammations, scars, and other disorders of choroid')] + ohdsi:1225 | -10.69 | [('topic_mismatch', 'Uveitis')] + +CASE 23: Older adults with macular degeneration + cipher:30295 | 35.6725 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3006 | 35.6325 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3005 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:2505 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:16256 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:14995 | 35.6125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3007 | 32.1125 | [('topic_primary', 'Macular Degeneration, Wet'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration, Wet"]}')] + ohdsi:536 | 30.6 | [('topic_primary', 'Age related macular degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Age related macular degeneration"]}')] + +CASE 24: Patients with autoimmune hemolytic anemia + cipher:18441 | 35.6925 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:12888 | 35.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune Hemolytic Anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.81 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:901 | 21.75 | [('topic_primary', 'Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anemia"]}')] + cipher:17112 | 17.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemias')] + ohdsi:210 | 15.4167 | [('topic_primary', 'Hemolytic Anemia')] + +CASE 25: Patients with MSI-low rectal adenocarcinoma + ohdsi:836 | 26.25 | [('topic_primary', 'colorectal cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["colorectal cancer"]}')] + ohdsi:823 | 26.25 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:819 | 20.5833 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": ["MSI-L", "MSI-indeterminate", "MSS", "pMMR"], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:831 | 19.5833 | [('topic_primary', 'Colorectal Cancer Treatment'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:843 | 18.85 | [('topic_primary', 'colorectal cancer')] + ohdsi:821 | 18.79 | [('topic_primary', 'Colorectal Cancer')] + ohdsi:812 | 18.77 | [('topic_primary', 'Colorectal Cancer')] + ohdsi:840 | 12.0833 | [('topic_primary', 'colorectal cancer treatment')] + +CASE 26: Patients with blistering skin lesions + ohdsi:652 | 13.875 | [('topic_primary', 'Vasculitis of the skin'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vasculitis of the skin"]}')] + ohdsi:376 | 6.6433 | [('topic_primary', 'Bleeding Skin')] + ohdsi:1168 | 5.54 | [('topic_primary', 'Skin Ulcer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Skin Ulcer"]}')] + ohdsi:414 | 5.25 | [('topic_primary', 'Skin Eruption Symptoms')] + ohdsi:948 | -3.73 | [('topic_mismatch', 'Rash')] + ohdsi:461 | -3.75 | [('topic_mismatch', 'Erythema multiforme')] + ohdsi:462 | -3.75 | [('topic_mismatch', 'Lichen planus')] + cipher:4016 | -3.8875 | [('topic_mismatch', 'Desquamative Rash')] + +CASE 27: Patients with stomatitis or mucositis + cipher:3657 | 26.2575 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 14.3792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 14.3392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 13.8592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 13.8192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 13.7792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -5.3875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -12.3875 | [('topic_mismatch', 'Open Wound')] + +CASE 28: Patients with neurofibromatosis type 1 + ohdsi:697 | 35.85 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:304 | 35.83 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:305 | 35.77 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:696 | 20.81 | [('topic_primary', 'Neurofibromatosis type 2')] + cipher:12649 | 20.1125 | [('topic_primary', 'Neurofibromatosis')] + ohdsi:698 | 20.04 | [('topic_primary', 'Neurofibromatosis syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis syndrome"]}')] + ohdsi:308 | 15.6667 | [('topic_primary', 'Neurofibromatosis'), ('topic_context', '{"context_conditions": ["MRI of Brain", "Ophthalmology Visits"], "target_conditions": ["Neurofibromatosis"]}')] + ohdsi:306 | 13.85 | [('topic_primary', 'Optic Pathway Glioma and Neurofibromatosis')] + +CASE 29: Patients with keloid scars + cipher:13930 | 28.1125 | [('topic_primary', 'Keloid Scar')] + cipher:15610 | 20.7125 | [('topic_primary', 'Keloid Scar')] + cipher:2950 | 20.6525 | [('topic_primary', 'Keloid Scar')] + cipher:18443 | 6.7125 | [('topic_primary', 'Severe Cutaneous Adverse Reaction (SCAR)')] + cipher:30650 | -11.3875 | [('topic_mismatch', 'Smoking Status')] + ohdsi:1168 | -14.69 | [('topic_mismatch', 'Skin Ulcer')] + ohdsi:1215 | -14.73 | [('topic_mismatch', 'Cancer')] + ohdsi:1102 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 30: Patients with acetaminophen exposure + ohdsi:1187 | 32.85 | [('topic_primary', 'acetaminophen exposure')] + ohdsi:1427 | 28.5 | [('topic_primary', 'Acamprosate Exposure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acamprosate Exposure"]}')] + ohdsi:1428 | 24.75 | [('topic_primary', 'Disulfiram Exposure')] + ohdsi:1158 | 20.83 | [('topic_primary', 'Aspirin Exposure')] + cipher:31254 | -15.3875 | [('topic_mismatch', 'Liver Disease')] + cipher:30616 | -15.3875 | [('topic_mismatch', 'Substance Misuse')] + ohdsi:1423 | -17.73 | [('topic_mismatch', 'Acute Intoxication')] + ohdsi:735 | -17.75 | [('topic_mismatch', 'Acute Liver Injury')] + +CASE 31: Patients exposed to rifamycin antibiotics + ohdsi:1206 | 19.4767 | [('topic_primary', 'Macrolide Drug Exposure')] + ohdsi:1211 | 0.85 | [('topic_mismatch', 'Rifamycins')] + ohdsi:1213 | 0.83 | [('topic_mismatch', 'Streptogramins')] + ohdsi:1202 | 0.77 | [('topic_mismatch', 'Carbapenems')] + ohdsi:1212 | 0.75 | [('topic_mismatch', 'Sulfonamides')] + ohdsi:1209 | 0.75 | [('topic_mismatch', 'Penicillins')] + ohdsi:1208 | 0.75 | [('topic_mismatch', 'Oxazolidinones')] + ohdsi:1207 | 0.75 | [('topic_mismatch', 'Antibiotics - Monobactams')] + +CASE 32: Patients with a joint or ligament sprain + cipher:14236 | 28.9425 | [('topic_primary', 'Joint Ligament Sprain'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Joint Ligament Sprain"]}')] + cipher:15814 | 15.1725 | [('topic_primary', 'Joint Ligament Sprain')] + ohdsi:363 | 13.27 | [('topic_primary', 'Joint stiffness'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Joint stiffness"]}')] + cipher:2944 | 11.2125 | [('topic_primary', 'Ligament sprain')] + cipher:3569 | 10.4875 | [('topic_primary', 'Rotator cuff (capsule) sprain'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Rotator cuff (capsule) sprain"]}')] + ohdsi:452 | 3.75 | [('topic_primary', 'Joint pain')] + cipher:3072 | 1.7792 | [('topic_primary', 'Muscle-tendon sprain')] + cipher:15813 | 1.1125 | [('topic_primary', 'Rotator cuff (capsule) sprain')] + +CASE 33: Pregnant patients with miscarriage or stillbirth + cipher:3056 | 31.6525 | [('topic_primary', 'Miscarriage; Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:15565 | 31.6125 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:13818 | 31.6125 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + ohdsi:627 | 21.33 | [('topic_primary', 'Miscarriage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage"]}')] + ohdsi:1432 | 16.31 | [('topic_primary', 'Stillbirth')] + ohdsi:606 | 16.25 | [('topic_primary', 'Stillbirth')] + ohdsi:1434 | -7.65 | [('topic_mismatch', 'Pregnancy Loss')] + ohdsi:1431 | -7.73 | [('topic_mismatch', 'Ectopic Pregnancy')] + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + cipher:31819 | 11.4425 | [('topic_primary', 'Arterial Embolism, Upper Extremity'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism"]}')] + cipher:13354 | 11.4025 | [('topic_primary', 'Arterial Embolism and Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism and Thrombosis"]}')] + cipher:2142 | 11.3625 | [('topic_primary', 'Arterial Embolism and Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism and Thrombosis"]}')] + cipher:31817 | 9.6125 | [('topic_primary', 'Embolism or Thrombosis')] + cipher:15204 | 7.6125 | [('topic_primary', 'Arterial Embolism and Thrombosis')] + cipher:31293 | 7.6125 | [('topic_primary', 'Arterial Embolism and Thrombosis')] + ohdsi:1090 | 3.625 | [('topic_primary', 'Pulmonary Embolism'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pulmonary Embolism"]}')] + cipher:31820 | 3.2125 | [('topic_primary', 'Lower Extremity Vascular Disease')] + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + cipher:31223 | 35.6125 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + cipher:30639 | 35.6125 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1301 | 32.1389 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": ["Cystitis", "Asymptomatic bacteriuria"], "target_conditions": ["Acute Urinary tract infections U... [truncated 12 chars]')] + ohdsi:1186 | 31.81 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:410 | 28.25 | [('topic_primary', 'Urinary Tract Infection')] + ohdsi:861 | 22.5 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": ["Pyuria", "Bacteriuria", "Cystitis"], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1061 | -7.65 | [('topic_mismatch', 'Cephalosporin Exposure')] + ohdsi:1060 | -7.67 | [('topic_mismatch', 'Fluoroquinolone Use')] + +CASE 36: Patients hospitalized with preinfarction syndrome + ohdsi:939 | -1.4357 | [('topic_context', '{"context_conditions": ["Preinfarction Syndrome", "Emergency Room Visit", "Inpatient Visit"], "target_conditions": ["Hos... [truncated 15 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1081 | -1.73 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30728 | -1.8875 | [('topic_mismatch', 'Unstable Angina')] + cipher:29772 | -5.3875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:30101 | -5.3875 | [('topic_mismatch', 'Myocardial Infarction')] + ohdsi:263 | -8.75 | [('topic_mismatch', 'Unstable Angina and NSTEMI')] + ohdsi:262 | -8.75 | [('topic_mismatch', 'Unstable Angina')] + cipher:30617 | -12.8275 | [('topic_mismatch', 'Coronary Heart Disease')] + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + cipher:3412 | 5.0925 | [('topic_primary', 'Personal history of diseases of blood and blood-forming organs')] + cipher:17130 | 5.0125 | [('topic_primary', 'Personal History of Blood and Bloodforming Organ Diseases')] + cipher:18428 | -3.8675 | [('topic_mismatch', 'Pancytopenia')] + cipher:30246 | -3.8875 | [('topic_mismatch', 'Aplastic Anaemias')] + cipher:30138 | -3.8875 | [('topic_mismatch', 'Hyposplenism')] + cipher:30287 | -3.8875 | [('topic_mismatch', 'Myelodysplastic Syndromes')] + cipher:29220 | -3.8875 | [('topic_mismatch', 'Anemias, Other')] + cipher:30672 | -3.8875 | [('topic_mismatch', 'Thalassaemia Trait')] + +CASE 38: Patients with benign pancreatic conditions + cipher:16955 | 20.7125 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16954 | 20.6725 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16947 | 20.6325 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16952 | 19.2775 | [('topic_primary', 'Pancreatic Cancer'), ('topic_context', '{"context_conditions": ["PSC", "IBD"], "target_conditions": ["Pancreatic Cancer"]}')] + cipher:16953 | 8.6125 | [('topic_primary', 'Pancreatic Inflammation')] + ohdsi:496 | -3.75 | [('topic_mismatch', 'Abdominal Pain')] + cipher:30223 | -3.8875 | [('topic_mismatch', 'Benign Neoplasm of Stomach and Duodenum')] + cipher:30238 | -3.8875 | [('topic_mismatch', 'Pancreatitis')] + +CASE 39: Patients with primary localized osteoarthritis + cipher:3192 | 34.1725 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9425 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1325 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.75 | [('topic_primary', 'Osteoarthritis')] + +CASE 40: New users of dihydropyridine calcium channel blockers + ohdsi:1047 | 44.33 | [('topic_primary', 'dihydropyridine calcium channel blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["dihydropyridine calcium channel blockers"]}')] + ohdsi:1048 | 36.85 | [('topic_primary', 'dihydropyridine calcium channel blockers')] + ohdsi:1036 | 23.31 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1049 | 22.645 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Hypertension", "Essential Hypertension"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1052 | 22.54 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Acute Myocardial Infarction"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1046 | 4.75 | [('topic_mismatch', 'Thiazide diuretics')] + ohdsi:1035 | 2.75 | [('topic_mismatch', 'Thiazide diuretics')] + cipher:30608 | -11.8875 | [('topic_mismatch', 'Cardiovascular Risk Score')] + +CASE 41: Veteran patients with renal sclerosis + cipher:13646 | 30.3625 | [('topic_primary', 'Nephritis Nephrosis Renal Sclerosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nephritis Nephrosis Renal Sclerosis"]}')] + cipher:13656 | 28.1725 | [('topic_primary', 'Renal Sclerosis')] + cipher:17322 | 21.7125 | [('topic_primary', 'Renal Sclerosis')] + cipher:15442 | 17.6125 | [('topic_primary', 'Nephritis, Nephrosis, Renal Sclerosis')] + cipher:18902 | 9.6125 | [('topic_primary', 'Renal Failure')] + cipher:31257 | 8.6125 | [('topic_primary', 'Renal disease')] + cipher:16003 | -3.8875 | [('topic_mismatch', 'Chronic Kidney Disease')] + ohdsi:964 | -11.17 | [('topic_mismatch', 'Chronic Kidney Disease')] + +CASE 42: Veteran patients with polymyalgia rheumatica + cipher:30277 | 34.2125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + cipher:13992 | 26.6525 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:17453 | 20.1925 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:3460 | 19.1725 | [('topic_primary', 'Polymyalgia Rheumatica')] + ohdsi:670 | -5.25 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -5.25 | [('topic_mismatch', 'Joint stiffness')] + cipher:30630 | -12.8875 | [('topic_mismatch', 'Rheumatoid Arthritis')] + +CASE 43: Veteran patients with autoimmune hemolytic anemia + cipher:18441 | 35.6725 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:12888 | 35.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune Hemolytic Anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.83 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:17112 | 18.6325 | [('topic_primary', 'Autoimmune Hemolytic Anemias')] + cipher:2178 | 14.7792 | [('topic_primary', 'Autoimmune hemolytic anemias (MAP)')] + cipher:3120 | 11.2792 | [('topic_primary', 'Non-autoimmune hemolytic anemias')] + +CASE 44: Veteran patients with cardiac complications + cipher:17258 | 22.1125 | [('topic_primary', 'Cardiac Complications')] + cipher:15819 | 18.1125 | [('topic_primary', 'Cardiac/Vascular Device Complications')] + ohdsi:1081 | -3.67 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30192 | -3.8875 | [('topic_mismatch', 'Cardiomyopathy')] + cipher:29218 | -3.8875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:16294 | -9.8875 | [('topic_mismatch', 'Cardiovascular Disease Mortality')] + cipher:16278 | -11.3475 | [('topic_mismatch', 'VA Administrative Data')] + cipher:30617 | -11.3875 | [('topic_mismatch', 'Coronary Heart Disease')] + +CASE 45: Patients diagnosed with fasciitis + cipher:15684 | 35.6925 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:2703 | 35.6525 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:14029 | 28.1125 | [('topic_primary', 'Fasciitis')] + ohdsi:1075 | -3.69 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.75 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + +CASE 46: Patients with stomatitis or mucositis + cipher:3657 | 27.7575 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 15.8792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 15.8392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 15.3592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 15.3192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 15.2792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -3.8875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -10.8875 | [('topic_mismatch', 'Open Wound')] + +CASE 47: Patients with Barretts esophagus + cipher:13531 | 19.9425 | [('topic_primary', "Barrett's Esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s Esophagus"]}')] + cipher:2187 | 19.9225 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + cipher:15342 | 19.8625 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + ohdsi:447 | -3.73 | [('topic_mismatch', 'Esophagitis')] + ohdsi:525 | -3.75 | [('topic_mismatch', 'Gastroesophageal Reflux Disease')] + ohdsi:446 | -3.75 | [('topic_mismatch', 'Eosinophilic esophagitis')] + cipher:30228 | -3.7875 | [('topic_mismatch', "Barrett's Oesophagus")] + cipher:30087 | -3.8875 | [('topic_mismatch', 'Primary Malignancy, Oesophageal')] + +CASE 48: Patients with regional enteritis + cipher:3534 | 35.6925 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + cipher:13571 | 28.1125 | [('topic_primary', 'Regional Enteritis')] + cipher:15376 | 21.1125 | [('topic_primary', 'Regional Enteritis')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:775 | -3.75 | [('topic_mismatch', 'Inflammatory Bowel Disease')] + cipher:30115 | -3.8875 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + +CASE 49: Patients with primary localized osteoarthritis + cipher:3192 | 34.1725 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9425 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1325 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.75 | [('topic_primary', 'Osteoarthritis')] + +CASE 50: Patients with aortic valve disease + cipher:13226 | 28.1125 | [('topic_primary', 'Aortic Valve Disease')] + cipher:13231 | 25.1125 | [('topic_primary', 'Aortic Valve Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Aortic Valve Disorders"]}')] + cipher:30301 | 24.6958 | [('topic_primary', 'Aortic Valve Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nonrheumatic aortic valve disorders"]}')] + cipher:17250 | 20.6125 | [('topic_primary', 'Aortic Valve Disease')] + cipher:2131 | 20.6125 | [('topic_primary', 'Aortic valve disease')] + ohdsi:1172 | 0.79 | [('topic_primary', 'Heart valve disorder')] + ohdsi:1103 | -0.9833 | [('topic_primary', 'Cardiac Valve Surgery'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Cardiac Valve Surgery"]}')] + ohdsi:876 | -4.17 | [('topic_primary', 'Cardiac Valve Surgery')] + +CASE 51: Patients with chronic periodontitis + cipher:3397 | 35.7125 | [('topic_primary', 'Periodontitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Periodontitis"]}')] + cipher:13492 | 28.1925 | [('topic_primary', 'Periodontitis')] + cipher:2371 | 24.1725 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13494 | 24.1125 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13493 | 24.1125 | [('topic_primary', 'Acute Periodontitis')] + cipher:15315 | 20.6125 | [('topic_primary', 'Periodontitis')] + cipher:15317 | 16.6325 | [('topic_primary', 'Chronic Periodontitis')] + cipher:29206 | -3.8875 | [('topic_mismatch', 'Peritonsillar Abscess')] + +CASE 52: Patients with hypertensive chronic kidney disease + ohdsi:923 | 30.5 | [('topic_primary', 'Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic Kidney Disease"]}')] + cipher:2846 | 28.1125 | [('topic_primary', 'Hypertensive Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertensive Chronic Kidney Disease"]}')] + cipher:13240 | 20.6125 | [('topic_primary', 'Hypertensive Chronic Kidney Disease')] + ohdsi:1191 | 19.08 | [('topic_primary', 'Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic Kidney Disease"]}')] + ohdsi:964 | 16.85 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31686 | 16.6525 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31287 | 14.1325 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31697 | 7.8725 | [('topic_primary', 'Hypertensive Heart and Renal Disease')] + +CASE 53: Patients with cardiomyopathy + cipher:30192 | 32.9625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 32.5058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 29.08 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 28.9025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 27.5292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 19.1325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + cipher:31280 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + +CASE 54: Patients with scleritis or episcleritis + cipher:30069 | 25.2125 | [('topic_primary', 'Scleritis and Episcleritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Scleritis and Episcleritis"]}')] + cipher:3581 | 25.1525 | [('topic_primary', 'Scleritis and episcleritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Scleritis and episcleritis"]}')] + cipher:13186 | 20.1125 | [('topic_primary', 'Scleritis and Episcleritis')] + cipher:15063 | 13.1125 | [('topic_primary', 'Scleritis and Episcleritis')] + ohdsi:1226 | -3.73 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -3.75 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:1223 | -3.75 | [('topic_mismatch', 'Uveitis')] + ohdsi:620 | -3.75 | [('topic_mismatch', 'Uveitis')] + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + cipher:12820 | 15.8325 | [('topic_primary', 'Carbohydrate Transport and Metabolism Disorders')] + cipher:12818 | 14.8058 | [('topic_primary', 'Disorders of Carbohydrate Transport and Metabolism')] + cipher:3256 | 14.3125 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:17097 | 14.2525 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:14837 | 14.2125 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:12805 | 11.0125 | [('topic_primary', 'Amino Acid Transport and Metabolism')] + cipher:2597 | 9.6411 | [('topic_primary', 'Protein Plasma-Amino-Acid Transport and Metabolism')] + cipher:2573 | 8.4258 | [('topic_primary', 'Carbohydrate Metabolism Disorders')] + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + ohdsi:1187 | 27.35 | [('topic_primary', 'acetaminophen exposure')] + ohdsi:1158 | -0.69 | [('topic_mismatch', 'Aspirin Exposure')] + ohdsi:719 | -9.75 | [('topic_mismatch', 'Hepatic Injury')] + cipher:18919 | -13.3875 | [('topic_mismatch', 'Serious Adverse Events')] + ohdsi:735 | -15.71 | [('topic_mismatch', 'Acute Liver Injury')] + ohdsi:293 | -15.75 | [('topic_mismatch', 'Acute Liver Injury')] + cipher:18446 | -15.8075 | [('topic_mismatch', 'Acute Liver Injury')] + cipher:31254 | -16.8875 | [('topic_mismatch', 'Liver Disease')] + +CASE 57: Patients diagnosed with dyschromia and vitiligo + cipher:13900 | 35.6725 | [('topic_primary', 'Dyschromia and Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dyschromia and Vitiligo"]}')] + cipher:2628 | 35.6325 | [('topic_primary', 'Dyschromia and Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dyschromia and Vitiligo"]}')] + ohdsi:471 | 21.85 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:30727 | 21.6925 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:3835 | 21.6125 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:13901 | 19.6125 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:15590 | 18.1525 | [('topic_primary', 'Dyschromia and Vitiligo')] + cipher:13902 | 13.4458 | [('topic_primary', 'Other Dyschromia')] + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + ohdsi:716 | 28.125 | [('topic_primary', 'Acute Hepatic Injury'), ('topic_context', '{"context_conditions": ["Hepatic Failure"], "target_conditions": ["Acute Hepatic Injury"]}')] + ohdsi:735 | 26.56 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure"], "target_conditions": ["Acute Liver Injury"]}')] + ohdsi:294 | 26.08 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure", "Viral Hepatitis", "Alcoholic Liver Disease"], "target_conditions": [... [truncated 22 chars]')] + ohdsi:293 | 25.25 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + cipher:18447 | 25.2125 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + cipher:18446 | 25.1525 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + ohdsi:719 | 22.1667 | [('topic_primary', 'Hepatic Injury'), ('topic_context', '{"context_conditions": ["Jaundice", "Liver Disease"], "target_conditions": ["Acute Hepatic Injury"]}')] + ohdsi:736 | 19.52 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure"], "target_conditions": ["Acute Liver Injury"]}')] + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + cipher:3108 | 35.6725 | [('topic_primary', 'Nerve Plexus Lesions'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nerve Plexus Lesions"]}')] + cipher:13084 | 26.2125 | [('topic_primary', 'Nerve Plexus Lesions')] + cipher:13085 | 25.1125 | [('topic_primary', 'Nerve Root Lesions'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nerve Root Lesions"]}')] + cipher:14974 | 20.6925 | [('topic_primary', 'Nerve Plexus Lesions')] + cipher:13083 | 18.0192 | [('topic_primary', 'Nerve Root and Plexus Disorders')] + cipher:14975 | 13.1125 | [('topic_primary', 'Nerve Root Lesions')] + cipher:14160 | 12.1125 | [('topic_primary', 'Nonallopathic Lesions NEC')] + cipher:14973 | 10.4992 | [('topic_primary', 'Nerve Root and Plexus Disorders')] + +CASE 60: patients with a diagnosis of PRES + ohdsi:223 | 21.95 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:1317 | -3.69 | [('topic_mismatch', 'Reye’s syndrome')] + ohdsi:516 | -3.71 | [('topic_mismatch', 'Thrombotic microangiopathy')] + ohdsi:248 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:1084 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:520 | -3.75 | [('topic_mismatch', 'Hypertensive disorder')] + ohdsi:543 | -3.75 | [('topic_mismatch', 'Seizure')] + +CASE 61: patients with chronic ulcerative colitis + ohdsi:860 | 35.81 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis"]}')] + ohdsi:458 | 35.77 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis"]}')] + cipher:30724 | 35.6125 | [('topic_primary', 'Ulcerative Colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative Colitis"]}')] + cipher:3770 | 32.1125 | [('topic_primary', 'Ulcerative colitis (chronic)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis (chronic)"]}')] + ohdsi:201 | 31.455 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": ["Rectal hemorrhage", "Inflammatory bowel disease", "Complications"], "target_conditions": ["Ulce... [truncated 17 chars]')] + ohdsi:775 | 4.0278 | [('topic_context', '{"context_conditions": ["First IBD Occurrence", "Chronic Ulcerative Proctitis"], "target_conditions": ["Inflammatory Bow... [truncated 13 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1067 | 0.5833 | [('topic_context', '{"context_conditions": ["Ulcerative colitis", "Rectal hemorrhage"], "target_conditions": ["JAK inhibitors"]}'), ('context_without_primary', 'topic only matched context fields')] + cipher:4126 | -3.8875 | [('topic_mismatch', 'Inflammatory Bowel Disease')] + +CASE 62: Veteran patients with developmental disorders that are pervasive + cipher:3415 | 32.1725 | [('topic_primary', 'Pervasive Developmental Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pervasive Developmental Disorders"]}')] + cipher:12996 | 25.4858 | [('topic_primary', 'Pervasive Developmental Disorders')] + cipher:17197 | 17.6325 | [('topic_primary', 'Developmental Delays and Disorders')] + cipher:17193 | 16.5458 | [('topic_primary', 'Pervasive Developmental Disorders')] + cipher:18933 | 15.7792 | [('topic_primary', 'Mental Health Disorders')] + cipher:17138 | 7.6125 | [('topic_primary', 'Other Persistent Mental Disorders')] + cipher:30605 | -3.8075 | [('topic_mismatch', 'Autism Spectrum')] + cipher:30166 | -3.8875 | [('topic_mismatch', 'Down Syndrome')] + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + ohdsi:510 | 34.29 | [('topic_primary', 'Acute myocardial infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute myocardial infarction"]}')] + ohdsi:1081 | 34.27 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:18982 | 34.1925 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:31590 | 34.1125 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:31275 | 34.1125 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + ohdsi:881 | 29.0167 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute myocardial infarction"]}')] + cipher:3998 | 27.1725 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Myocardial Infarction"]}')] + cipher:30748 | 27.1125 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Myocardial Infarction"]}')] + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + ohdsi:632 | 19.35 | [('topic_primary', 'Antiphospholipid syndrome')] + ohdsi:781 | 19.33 | [('topic_primary', 'Antiphospholipid Syndrome')] + ohdsi:738 | 2.75 | [('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}'), ('context_without_primary', 'topic only matched context fields')] + cipher:4725 | -5.3675 | [('topic_mismatch', 'Lung Cancer Diagnosis')] + cipher:4332 | -5.3875 | [('topic_mismatch', 'Colorectal Cancer')] + cipher:4723 | -5.3875 | [('topic_mismatch', 'Lung Cancer')] + cipher:15875 | -5.3875 | [('topic_mismatch', 'Prostate Cancer')] + cipher:16293 | -12.3475 | [('topic_mismatch', 'Stroke Rehospitalization')] + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + ohdsi:864 | 18.95 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + ohdsi:651 | 18.95 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31120 | 18.9125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31241 | 18.8125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:30112 | 18.8125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31846 | 18.8125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31141 | 18.8125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + ohdsi:33 | 15.45 | [('topic_primary', 'Dementia')] + +CASE 66: patients who experienced a GI bleed adverse event + ohdsi:482 | -7.65 | [('topic_mismatch', 'Gastrointestinal hemorrhage')] + ohdsi:349 | -7.69 | [('topic_mismatch', 'Gastrointestinal Bleeding')] + ohdsi:794 | -7.73 | [('topic_mismatch', 'Hemorrhage of digestive system')] + ohdsi:77 | -7.75 | [('topic_mismatch', 'Gastrointestinal Bleeding')] + ohdsi:888 | -7.75 | [('topic_mismatch', 'Gastrointestinal Bleeding')] + ohdsi:57 | -7.75 | [('topic_mismatch', 'Bleeding')] + ohdsi:582 | -7.75 | [('topic_mismatch', 'Bleeding')] + cipher:4285 | -7.8875 | [('topic_mismatch', 'Bleeding')] + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + ohdsi:678 | 34.35 | [('topic_primary', 'COVID-19'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COVID-19"]}')] + cipher:31308 | 30.5975 | [('topic_primary', 'Confirmed COVID-19'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Confirmed COVID-19"]}')] + ohdsi:47 | 18.75 | [('topic_primary', 'COVID-19 diagnosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COVID-19 diagnosis"]}')] + ohdsi:59 | 17.9967 | [('topic_primary', 'COVID-19 Diagnosis'), ('topic_context', '{"context_conditions": ["SARS-CoV-2 test"], "target_conditions": ["COVID-19 diagnosis"]}')] + ohdsi:44 | 17.1233 | [('topic_primary', 'COVID-19 Infection')] + cipher:16189 | 16.9458 | [('topic_primary', 'COVID-19 Severity')] + cipher:29120 | 7.6125 | [('topic_primary', 'Long COVID')] + ohdsi:346 | -1.75 | [('topic_mismatch', 'Outpatient Visit')] + +CASE 68: veterans who experienced an abdominal aortic aneurysm + cipher:29240 | 33.6125 | [('topic_primary', 'Abdominal Aortic Aneurysm (AAA)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Abdominal Aortic Aneurysm"]}')] + cipher:29169 | 26.1325 | [('topic_primary', 'Abdominal Aortic Aneurysm (AAA)')] + cipher:15196 | 21.6125 | [('topic_primary', 'Abdominal Aortic Aneurysm')] + ohdsi:1093 | 12.0 | [('topic_primary', 'Abdominal Aortic Aneurysm Repair'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Abdominal Aortic Aneurysm Repair"]}')] + ohdsi:866 | -2.8167 | [('topic_primary', 'Aortic Repair')] + ohdsi:1290 | -2.8367 | [('topic_primary', 'Aortic Repair')] + ohdsi:1314 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + ohdsi:1291 | -20.25 | [('topic_mismatch', 'Bypass Surgery')] + +CASE 69: patients with COPD according to diagnostic codes in the EHR + cipher:29794 | 35.7125 | [('topic_primary', 'COPD'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COPD"]}')] + cipher:4241 | 35.6125 | [('topic_primary', 'COPD'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COPD"]}')] + cipher:16798 | 29.6125 | [('topic_primary', 'COPD Exacerbations'), ('topic_context', '{"context_conditions": ["Acute Respiratory Failure"], "target_conditions": ["COPD Exacerbations"]}')] + cipher:29756 | 21.7925 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:31297 | 21.7525 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + ohdsi:1192 | 17.87 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:16274 | 14.2125 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:29553 | -3.8875 | [('topic_mismatch', 'Sleep Apnea')] + +CASE 70: patients hospitalized at least once for heart failure + cipher:16152 | 37.6125 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:934 | 36.9767 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": ["Hypertension"], "target_conditions": ["Heart Failure"]}')] + ohdsi:1303 | 34.33 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + cipher:30106 | 34.1125 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + cipher:16291 | 30.1325 | [('topic_primary', 'Heart Failure Rehospitalization'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure Rehospitalization"]}')] + ohdsi:979 | 27.35 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:938 | 26.75 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:68 | 25.75 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + cipher:30803 | 30.2725 | [('topic_primary', 'Meglitinide Use in Type 2 Diabetes')] + cipher:31250 | 25.1925 | [('topic_primary', 'Diabetes')] + cipher:31195 | 25.1125 | [('topic_primary', 'Diabetes')] + cipher:30170 | 24.9625 | [('topic_primary', 'Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Type 1 Diabetes", "Type 2 Diabetes"]}')] + cipher:16207 | 24.8825 | [('topic_primary', 'Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Diabetes Mellitus"]}')] + cipher:16346 | 23.1125 | [('topic_primary', 'Type II Diabetes'), ('topic_context', '{"context_conditions": ["Diabetes Diagnosis"], "target_conditions": ["Type 2 Diabetes"]}')] + cipher:3761 | 19.3375 | [('topic_primary', 'Type 1 diabetes with renal manifestations'), ('topic_context', '{"context_conditions": ["renal manifestations"], "target_conditions": ["Type 1 diabetes"]}')] + cipher:16277 | 16.6125 | [('topic_primary', 'Diabetes')] + diff --git a/docs/evaluation/phenotype_recommendations/testing-weighted_0.8_2.0.txt b/docs/evaluation/phenotype_recommendations/testing-weighted_0.8_2.0.txt new file mode 100644 index 0000000..a18ceb0 --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/testing-weighted_0.8_2.0.txt @@ -0,0 +1,2322 @@ +INFO: creating output file /tmp/phenotype_recommendation_tests.json +INFO: Cardiac defibrillator in situ (MAP) +INFO: Fasciitis (gwPheWAS) +INFO: Acute prostatitis (MAP) +INFO: [P] Esophagectomy +INFO: [P][R] Peripheral neuritis +INFO: [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap +INFO: [P][R] Allergic rhinitis +INFO: Ischemic Heart Disease (Sandhu) +INFO: Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) +INFO: [P] Lung Resection +INFO: [P] Laryngitis +INFO: Regional Enteritis (Phecode) +INFO: Renal Sclerosis NOS (VADC) +INFO: Other cardiomyopathy (MAP) +INFO: [P] Posterior reversible encephalopathy syndrome PRES +INFO: [P] Anorexia Nervosa +INFO: [P] Dizziness or giddiness including motion sickness and vertigo +INFO: Polymyalgia Rheumatica (VADC) +INFO: Adrenal Cortical Steroids Causing Adverse Effects in Therapeutic Use (Phecode) +INFO: [P][R] Low blood pressure +INFO: [P] Encephalopathy +INFO: [P] Birdshot chorioretinitis +INFO: Macular Degeneration (Senile) of Retina Nos (Phecode) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: [P] Primary adenocarcinoma of rectum MSI-L +INFO: Blister (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Neurofibromatosis type 1 (FP) +INFO: Keloid scar (gwPheWAS) +INFO: [P] acetaminophen exposure 10 +INFO: [P] Antibiotics Rifamycins 10 +INFO: Joint/ligament sprain (gwPheWAS) +INFO: Miscarriage; stillbirth (MAP) +INFO: Arterial embolism and thrombosis of lower extremity artery (MAP) +INFO: [P] New users of Cephalosporin systemetic nested in Urinary Tract Infection +INFO: [P] Hospitalization with preinfarction syndrome +INFO: Personal history of diseases of blood and blood-forming organs (MAP) +INFO: Other Benign Pancreatic Conditions (Nguyen) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: [P] New users of dihydropyridine calcium channel blockers +INFO: Renal Sclerosis NOS (VADC) +INFO: Polymyalgia Rheumatica (VADC) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: Cardiac Complications Not Elsewhere Classified (VADC) +INFO: Fasciitis (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Barrett's esophagus (gwPheWAS) +INFO: Regional Enteritis (Phecode) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: Aortic Valve Disease (Phecode) +INFO: Chronic Periodontitis (Phecode) +INFO: Hypertensive chronic kidney disease (MAP) +INFO: Other cardiomyopathy (MAP) +INFO: Scleritis and episcleritis (MAP) +INFO: Other disorders of carbohydrate transport and metabolism (MAP) +INFO: [P] acetaminophen exposure 10 +INFO: Dyschromia and Vitiligo +INFO: Acute Hepatic Injury with no pre-existing liver disease +INFO: Nerve Plexus Lesions +INFO: Posterior reversible encephalopathy syndrome PRES +INFO: Ulcerative colitis (chronic) +INFO: Pervasive Developmental Disorders +INFO: Acute myocardial infarction +INFO: Antiphospholipid syndrome +INFO: dementia in older adults +INFO: GI bleeding adverse event outcome +INFO: running COVID outpatient diagnosis cohort +INFO: running abdominal aortic aneurysm in veterans +INFO: COPD phenotype using diagnosis codes +INFO: heart failure hospitalization cohort +INFO: diabetes medication-based phenotype +INFO: Tests completed. File written. +RESULTS SUMMARY: +count 71 +CASE 1: Patients with an implanted cardiac defibrillator + shortlist: ['cipher:13288', 'cipher:2288'] + rec_ids: ['cipher:13288', 'cipher:2288'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13288', 'cipher:2288'] + final_deterministic: {'selected_ids': ['cipher:13288', 'cipher:2288'], 'matched_llm_ids': ['cipher:13288'], 'defaulted_ids': ['cipher:2288'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 2: Patients diagnosed with fasciitis + shortlist: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + rec_ids: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + final_deterministic: {'selected_ids': ['cipher:14029', 'cipher:15684', 'cipher:2703'], 'matched_llm_ids': ['cipher:14029', 'cipher:15684', 'cipher:2703'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 3: Patients with acute prostatitis + shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:2054'] + rec_ids: ['ohdsi:283', 'cipher:13720', 'cipher:2054'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:283', 'cipher:13720', 'cipher:2054'] + final_deterministic: {'selected_ids': ['ohdsi:283', 'cipher:13720', 'cipher:2054'], 'matched_llm_ids': ['ohdsi:283', 'cipher:13720', 'cipher:2054'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 4: Patients who underwent esophagectomy + shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870', 'ohdsi:1309'] + rec_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870', 'ohdsi:1309'] + final_deterministic: {'selected_ids': ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870'], 'matched_llm_ids': ['ohdsi:1097'], 'defaulted_ids': ['ohdsi:1294', 'ohdsi:870'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 5: Patients diagnosed with peripheral neuritis + shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + rec_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: ['ohdsi:540'] + dedupe_backfilled_ids: [] + dedupe_applied: True + enforced_shortlist_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + final_deterministic: {'selected_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'matched_llm_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + shortlist: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + rec_ids: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + final_deterministic: {'selected_ids': ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'], 'matched_llm_ids': ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 7: Patients with allergic rhinitis + shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081'] + rec_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081'] + final_deterministic: {'selected_ids': ['ohdsi:508', 'ohdsi:367', 'cipher:2081'], 'matched_llm_ids': ['ohdsi:508', 'ohdsi:367'], 'defaulted_ids': ['cipher:2081'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 8: Patients with ischemic heart disease + shortlist: ['cipher:16261', 'cipher:29560', 'cipher:29218', 'ohdsi:654'] + rec_ids: ['cipher:16261', 'cipher:29560', 'cipher:29218'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:16261', 'cipher:29560', 'cipher:29218', 'ohdsi:654'] + final_deterministic: {'selected_ids': ['cipher:16261', 'cipher:29560', 'cipher:29218'], 'matched_llm_ids': ['cipher:16261', 'cipher:29560', 'cipher:29218'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + shortlist: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + rec_ids: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + final_deterministic: {'selected_ids': ['cipher:2643', 'cipher:17376', 'cipher:2798'], 'matched_llm_ids': ['cipher:2643', 'cipher:17376', 'cipher:2798'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 10: Patients who underwent lung resection + shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + rec_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + final_deterministic: {'selected_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'matched_llm_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 11: Patients with laryngitis + shortlist: ['ohdsi:355', 'cipher:2046'] + rec_ids: ['ohdsi:355', 'cipher:2046'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:355', 'cipher:2046'] + final_deterministic: {'selected_ids': ['ohdsi:355', 'cipher:2046'], 'matched_llm_ids': ['ohdsi:355'], 'defaulted_ids': ['cipher:2046'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 12: Patients with regional enteritis + shortlist: ['cipher:13571', 'cipher:3534'] + rec_ids: ['cipher:13571', 'cipher:3534'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13571', 'cipher:3534'] + final_deterministic: {'selected_ids': ['cipher:13571', 'cipher:3534'], 'matched_llm_ids': ['cipher:13571', 'cipher:3534'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 13: Patients with renal sclerosis + shortlist: ['cipher:13646', 'cipher:13656', 'ohdsi:1003', 'cipher:17322'] + rec_ids: ['cipher:13646', 'cipher:13656', 'ohdsi:1003'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13646', 'cipher:13656', 'ohdsi:1003', 'cipher:17322'] + final_deterministic: {'selected_ids': ['cipher:13646', 'cipher:13656', 'ohdsi:1003'], 'matched_llm_ids': ['cipher:13646', 'cipher:13656', 'ohdsi:1003'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 14: Patients with cardiomyopathy + shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + rec_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + final_deterministic: {'selected_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30192'], 'matched_llm_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30192'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 15: Patients with a diagnosis of PRES + shortlist: ['ohdsi:223'] + rec_ids: ['ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:223'], 'matched_llm_ids': ['ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 16: Patients with anorexia nervosa + shortlist: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + rec_ids: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + final_deterministic: {'selected_ids': ['ohdsi:1340', 'cipher:17187', 'cipher:2117'], 'matched_llm_ids': ['ohdsi:1340'], 'defaulted_ids': ['cipher:17187', 'cipher:2117'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 17: Patients with dizziness, vertigo, or motion sickness + shortlist: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + rec_ids: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + final_deterministic: {'selected_ids': ['cipher:13215', 'ohdsi:893', 'cipher:3402'], 'matched_llm_ids': ['cipher:13215', 'ohdsi:893'], 'defaulted_ids': ['cipher:3402'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 18: Patients with polymyalgia rheumatica + shortlist: ['ohdsi:670', 'cipher:30277', 'cipher:13992'] + rec_ids: ['ohdsi:670', 'cipher:30277', 'cipher:13992'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:670', 'cipher:30277', 'cipher:13992'] + final_deterministic: {'selected_ids': ['ohdsi:670', 'cipher:30277', 'cipher:13992'], 'matched_llm_ids': ['cipher:30277', 'cipher:13992'], 'defaulted_ids': ['ohdsi:670'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + shortlist: ['cipher:2064', 'cipher:2123', 'cipher:2125'] + rec_ids: ['cipher:2064', 'cipher:2123', 'cipher:2125'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2064', 'cipher:2123', 'cipher:2125'] + final_deterministic: {'selected_ids': ['cipher:2064', 'cipher:2123', 'cipher:2125'], 'matched_llm_ids': ['cipher:2064'], 'defaulted_ids': ['cipher:2123', 'cipher:2125'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 20: Patients with low blood pressure + shortlist: ['ohdsi:890', 'ohdsi:526', 'cipher:13390'] + rec_ids: ['ohdsi:890', 'ohdsi:526', 'cipher:13390'] + replaced_ids: ['ohdsi:997'] + blocked_pool_ids: ['ohdsi:997'] + blocked_candidate_reasons: {'ohdsi:997': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:890', 'ohdsi:526', 'cipher:13390'] + final_deterministic: {'selected_ids': ['ohdsi:890', 'ohdsi:526', 'cipher:13390'], 'matched_llm_ids': ['ohdsi:890', 'ohdsi:526', 'cipher:13390'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 21: Patients with encephalopathy + shortlist: ['ohdsi:194', 'cipher:2664', 'ohdsi:331', 'ohdsi:223'] + rec_ids: ['ohdsi:194', 'cipher:2664', 'ohdsi:331'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:194', 'cipher:2664', 'ohdsi:331', 'ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:194', 'cipher:2664', 'ohdsi:331'], 'matched_llm_ids': ['ohdsi:194', 'cipher:2664', 'ohdsi:331'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 22: Patients with birdshot chorioretinitis + shortlist: ['ohdsi:1223'] + rec_ids: ['ohdsi:1223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1223'] + final_deterministic: {'selected_ids': ['ohdsi:1223'], 'matched_llm_ids': ['ohdsi:1223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 23: Older adults with macular degeneration + shortlist: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + rec_ids: ['cipher:30295', 'cipher:3006', 'cipher:3005'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + final_deterministic: {'selected_ids': ['cipher:30295', 'cipher:3006', 'cipher:3005'], 'matched_llm_ids': ['cipher:30295', 'cipher:3006', 'cipher:3005'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 24: Patients with autoimmune hemolytic anemia + shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'matched_llm_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 25: Patients with MSI-low rectal adenocarcinoma + shortlist: [] + rec_ids: [] + replaced_ids: ['ohdsi:845', 'ohdsi:844', 'ohdsi:812'] + blocked_pool_ids: ['ohdsi:845', 'ohdsi:844', 'ohdsi:812', 'ohdsi:843', 'ohdsi:821'] + blocked_candidate_reasons: {'ohdsi:845': 'procedure_for_diagnosis_intent', 'ohdsi:844': 'procedure_for_diagnosis_intent', 'ohdsi:812': 'procedure_for_diagnosis_intent', 'ohdsi:843': 'procedure_for_diagnosis_intent', 'ohdsi:821': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: [] + final_deterministic: {'selected_ids': [], 'matched_llm_ids': [], 'defaulted_ids': [], 'invalid_llm_ids': ['ohdsi:2773', 'ohdsi:33', 'ohdsi:375'], 'duplicate_llm_ids': [], 'used_llm_justification_count': 0, 'used_default_justification_count': 0} + +CASE 26: Patients with blistering skin lesions + shortlist: ['ohdsi:652', 'ohdsi:376'] + rec_ids: ['ohdsi:652', 'ohdsi:376'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:652', 'ohdsi:376'] + final_deterministic: {'selected_ids': ['ohdsi:652', 'ohdsi:376'], 'matched_llm_ids': ['ohdsi:652', 'ohdsi:376'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 27: Patients with stomatitis or mucositis + shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + rec_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + final_deterministic: {'selected_ids': ['cipher:17298', 'cipher:15333', 'cipher:3657'], 'matched_llm_ids': ['cipher:17298', 'cipher:15333'], 'defaulted_ids': ['cipher:3657'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 28: Patients with neurofibromatosis type 1 + shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + rec_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + final_deterministic: {'selected_ids': ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'], 'matched_llm_ids': ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 29: Patients with keloid scars + shortlist: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + rec_ids: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + final_deterministic: {'selected_ids': ['cipher:13930', 'cipher:15610', 'cipher:2950'], 'matched_llm_ids': ['cipher:13930', 'cipher:15610'], 'defaulted_ids': ['cipher:2950'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 30: Patients with acetaminophen exposure + shortlist: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1158'] + rec_ids: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1158'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1158'] + final_deterministic: {'selected_ids': ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1158'], 'matched_llm_ids': ['ohdsi:1187'], 'defaulted_ids': ['ohdsi:1427', 'ohdsi:1158'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 31: Patients exposed to rifamycin antibiotics + shortlist: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + rec_ids: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + final_deterministic: {'selected_ids': ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'], 'matched_llm_ids': ['ohdsi:1211'], 'defaulted_ids': ['ohdsi:1207', 'ohdsi:1203'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 32: Patients with a joint or ligament sprain + shortlist: ['cipher:14236', 'cipher:3569', 'cipher:2944', 'cipher:15814'] + rec_ids: ['cipher:14236', 'cipher:3569', 'cipher:2944'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:14236', 'cipher:3569', 'cipher:2944', 'cipher:15814'] + final_deterministic: {'selected_ids': ['cipher:14236', 'cipher:3569', 'cipher:2944'], 'matched_llm_ids': ['cipher:14236'], 'defaulted_ids': ['cipher:3569', 'cipher:2944'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 33: Pregnant patients with miscarriage or stillbirth + shortlist: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + rec_ids: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + final_deterministic: {'selected_ids': ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'], 'matched_llm_ids': ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + shortlist: ['cipher:13354', 'cipher:2142'] + rec_ids: ['cipher:13354', 'cipher:2142'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13354', 'cipher:2142'] + final_deterministic: {'selected_ids': ['cipher:13354', 'cipher:2142'], 'matched_llm_ids': ['cipher:13354'], 'defaulted_ids': ['cipher:2142'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + shortlist: ['ohdsi:1301', 'ohdsi:1186', 'cipher:31223'] + rec_ids: ['ohdsi:1301', 'ohdsi:1186', 'cipher:31223'] + replaced_ids: [] + blocked_pool_ids: ['ohdsi:861'] + blocked_candidate_reasons: {'ohdsi:861': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1301', 'ohdsi:1186', 'cipher:31223'] + final_deterministic: {'selected_ids': ['ohdsi:1301', 'ohdsi:1186', 'cipher:31223'], 'matched_llm_ids': ['ohdsi:1301', 'ohdsi:1186'], 'defaulted_ids': ['cipher:31223'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 36: Patients hospitalized with preinfarction syndrome + shortlist: ['ohdsi:939'] + rec_ids: ['ohdsi:939'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:939'] + final_deterministic: {'selected_ids': ['ohdsi:939'], 'matched_llm_ids': ['ohdsi:939'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + shortlist: ['ohdsi:738', 'cipher:3412'] + rec_ids: ['ohdsi:738', 'cipher:3412'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:738', 'cipher:3412'] + final_deterministic: {'selected_ids': ['ohdsi:738', 'cipher:3412'], 'matched_llm_ids': ['ohdsi:738'], 'defaulted_ids': ['cipher:3412'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 38: Patients with benign pancreatic conditions + shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + rec_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + final_deterministic: {'selected_ids': ['cipher:16954', 'cipher:16952', 'cipher:16953'], 'matched_llm_ids': ['cipher:16952', 'cipher:16953'], 'defaulted_ids': ['cipher:16954'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 39: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + rec_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'matched_llm_ids': ['cipher:3192', 'cipher:4399', 'cipher:4029'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 40: New users of dihydropyridine calcium channel blockers + shortlist: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + rec_ids: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + final_deterministic: {'selected_ids': ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052'], 'matched_llm_ids': ['ohdsi:1047', 'ohdsi:1048'], 'defaulted_ids': ['ohdsi:1052'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 41: Veteran patients with renal sclerosis + shortlist: ['cipher:13656', 'cipher:17322', 'cipher:18902', 'cipher:31257'] + rec_ids: ['cipher:13656', 'cipher:17322', 'cipher:18902'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13656', 'cipher:17322', 'cipher:18902', 'cipher:31257'] + final_deterministic: {'selected_ids': ['cipher:13656', 'cipher:17322', 'cipher:18902'], 'matched_llm_ids': ['cipher:13656', 'cipher:18902'], 'defaulted_ids': ['cipher:17322'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 42: Veteran patients with polymyalgia rheumatica + shortlist: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + rec_ids: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + final_deterministic: {'selected_ids': ['cipher:30277', 'cipher:13992', 'cipher:17453'], 'matched_llm_ids': ['cipher:30277', 'cipher:13992', 'cipher:17453'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 43: Veteran patients with autoimmune hemolytic anemia + shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'matched_llm_ids': ['ohdsi:738', 'cipher:18441'], 'defaulted_ids': ['ohdsi:1018'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 44: Veteran patients with cardiac complications + shortlist: ['ohdsi:1081'] + rec_ids: ['ohdsi:1081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1081'] + final_deterministic: {'selected_ids': ['ohdsi:1081'], 'matched_llm_ids': ['ohdsi:1081'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 45: Patients diagnosed with fasciitis + shortlist: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + rec_ids: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + final_deterministic: {'selected_ids': ['cipher:15684', 'cipher:2703', 'cipher:14029'], 'matched_llm_ids': ['cipher:15684', 'cipher:2703', 'cipher:14029'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 46: Patients with stomatitis or mucositis + shortlist: ['cipher:3657', 'cipher:17298', 'cipher:13516'] + rec_ids: ['cipher:3657', 'cipher:17298', 'cipher:13516'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3657', 'cipher:17298', 'cipher:13516'] + final_deterministic: {'selected_ids': ['cipher:3657', 'cipher:17298', 'cipher:13516'], 'matched_llm_ids': ['cipher:3657', 'cipher:17298', 'cipher:13516'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 47: Patients with Barretts esophagus + shortlist: ['cipher:13531', 'cipher:15342', 'cipher:2187'] + rec_ids: ['cipher:13531', 'cipher:15342', 'cipher:2187'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13531', 'cipher:15342', 'cipher:2187'] + final_deterministic: {'selected_ids': ['cipher:13531', 'cipher:15342', 'cipher:2187'], 'matched_llm_ids': ['cipher:13531', 'cipher:15342'], 'defaulted_ids': ['cipher:2187'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 48: Patients with regional enteritis + shortlist: ['ohdsi:884', 'cipher:3534', 'cipher:13571'] + rec_ids: ['ohdsi:884', 'cipher:3534', 'cipher:13571'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:884', 'cipher:3534', 'cipher:13571'] + final_deterministic: {'selected_ids': ['ohdsi:884', 'cipher:3534', 'cipher:13571'], 'matched_llm_ids': ['ohdsi:884', 'cipher:3534', 'cipher:13571'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 49: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + rec_ids: ['cipher:3192', 'cipher:4399', 'cipher:3190'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4399', 'cipher:3190'], 'matched_llm_ids': ['cipher:3192', 'cipher:4399', 'cipher:3190'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 50: Patients with aortic valve disease + shortlist: ['cipher:30301', 'cipher:17250', 'cipher:2131', 'cipher:31315', 'ohdsi:1172'] + rec_ids: ['cipher:30301', 'cipher:17250', 'cipher:2131'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30301', 'cipher:17250', 'cipher:2131', 'cipher:31315', 'ohdsi:1172'] + final_deterministic: {'selected_ids': ['cipher:30301', 'cipher:17250', 'cipher:2131'], 'matched_llm_ids': ['cipher:30301', 'cipher:17250'], 'defaulted_ids': ['cipher:2131'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 51: Patients with chronic periodontitis + shortlist: ['cipher:13494', 'cipher:3397', 'cipher:15317', 'cipher:2371'] + rec_ids: ['cipher:13494', 'cipher:3397', 'cipher:15317'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13494', 'cipher:3397', 'cipher:15317', 'cipher:2371'] + final_deterministic: {'selected_ids': ['cipher:13494', 'cipher:3397', 'cipher:15317'], 'matched_llm_ids': ['cipher:13494', 'cipher:15317'], 'defaulted_ids': ['cipher:3397'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 52: Patients with hypertensive chronic kidney disease + shortlist: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191', 'ohdsi:964'] + rec_ids: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191', 'ohdsi:964'] + final_deterministic: {'selected_ids': ['ohdsi:923', 'cipher:2846', 'ohdsi:1191'], 'matched_llm_ids': ['ohdsi:923', 'cipher:2846', 'ohdsi:1191'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 53: Patients with cardiomyopathy + shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + rec_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + final_deterministic: {'selected_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'matched_llm_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 54: Patients with scleritis or episcleritis + shortlist: ['cipher:30069', 'cipher:3581'] + rec_ids: ['cipher:30069', 'cipher:3581'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30069', 'cipher:3581'] + final_deterministic: {'selected_ids': ['cipher:30069', 'cipher:3581'], 'matched_llm_ids': ['cipher:30069', 'cipher:3581'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + shortlist: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + rec_ids: ['cipher:12820', 'cipher:12818', 'cipher:3256'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + final_deterministic: {'selected_ids': ['cipher:12820', 'cipher:12818', 'cipher:3256'], 'matched_llm_ids': ['cipher:12820', 'cipher:12818', 'cipher:3256'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + shortlist: ['ohdsi:1187'] + rec_ids: ['ohdsi:1187'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1187'] + final_deterministic: {'selected_ids': ['ohdsi:1187'], 'matched_llm_ids': ['ohdsi:1187'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 57: Patients diagnosed with dyschromia and vitiligo + shortlist: ['cipher:13900', 'cipher:2628', 'ohdsi:471', 'cipher:30727'] + rec_ids: ['cipher:13900', 'cipher:2628', 'ohdsi:471'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13900', 'cipher:2628', 'ohdsi:471', 'cipher:30727'] + final_deterministic: {'selected_ids': ['cipher:13900', 'cipher:2628', 'ohdsi:471'], 'matched_llm_ids': ['cipher:13900', 'cipher:2628', 'ohdsi:471'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + shortlist: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293', 'cipher:18447'] + rec_ids: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293', 'cipher:18447'] + final_deterministic: {'selected_ids': ['ohdsi:735', 'ohdsi:294', 'ohdsi:293'], 'matched_llm_ids': ['ohdsi:294', 'ohdsi:293'], 'defaulted_ids': ['ohdsi:735'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + shortlist: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + rec_ids: ['cipher:13084', 'cipher:13085', 'cipher:14974'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + final_deterministic: {'selected_ids': ['cipher:13084', 'cipher:13085', 'cipher:14974'], 'matched_llm_ids': ['cipher:13084', 'cipher:13085', 'cipher:14974'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 60: patients with a diagnosis of PRES + shortlist: ['ohdsi:223'] + rec_ids: ['ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:223'], 'matched_llm_ids': ['ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 61: patients with chronic ulcerative colitis + shortlist: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201', 'cipher:30724'] + rec_ids: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201', 'cipher:30724'] + final_deterministic: {'selected_ids': ['ohdsi:860', 'ohdsi:458', 'ohdsi:201'], 'matched_llm_ids': ['ohdsi:860', 'ohdsi:458', 'ohdsi:201'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 62: Veteran patients with developmental disorders that are pervasive + shortlist: ['cipher:12996', 'cipher:17197', 'cipher:17193', 'cipher:3415'] + rec_ids: ['cipher:12996', 'cipher:17197', 'cipher:17193'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:12996', 'cipher:17197', 'cipher:17193', 'cipher:3415'] + final_deterministic: {'selected_ids': ['cipher:12996', 'cipher:17197', 'cipher:17193'], 'matched_llm_ids': ['cipher:12996', 'cipher:17197', 'cipher:17193'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + shortlist: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + rec_ids: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + final_deterministic: {'selected_ids': ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'], 'matched_llm_ids': ['ohdsi:510', 'ohdsi:1081'], 'defaulted_ids': ['cipher:18982'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + shortlist: ['ohdsi:632', 'ohdsi:781'] + rec_ids: ['ohdsi:632', 'ohdsi:781'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:632', 'ohdsi:781'] + final_deterministic: {'selected_ids': ['ohdsi:632', 'ohdsi:781'], 'matched_llm_ids': ['ohdsi:632', 'ohdsi:781'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + shortlist: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + rec_ids: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + final_deterministic: {'selected_ids': ['ohdsi:864', 'ohdsi:651', 'cipher:31120'], 'matched_llm_ids': ['ohdsi:864', 'ohdsi:651'], 'defaulted_ids': ['cipher:31120'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 66: patients who experienced a GI bleed adverse event + shortlist: ['ohdsi:888', 'ohdsi:417', 'ohdsi:349', 'ohdsi:77'] + rec_ids: ['ohdsi:888', 'ohdsi:417', 'ohdsi:349'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:888', 'ohdsi:417', 'ohdsi:349', 'ohdsi:77'] + final_deterministic: {'selected_ids': ['ohdsi:888', 'ohdsi:417', 'ohdsi:349'], 'matched_llm_ids': ['ohdsi:888', 'ohdsi:417', 'ohdsi:349'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + shortlist: ['ohdsi:678', 'cipher:31308'] + rec_ids: ['ohdsi:678', 'cipher:31308'] + replaced_ids: ['ohdsi:47'] + blocked_pool_ids: ['ohdsi:47', 'ohdsi:59'] + blocked_candidate_reasons: {'ohdsi:47': 'withdrawn', 'ohdsi:59': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:678', 'cipher:31308'] + final_deterministic: {'selected_ids': ['ohdsi:678', 'cipher:31308'], 'matched_llm_ids': ['ohdsi:678'], 'defaulted_ids': ['cipher:31308'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 68: veterans who experienced an abdominal aortic aneurysm + shortlist: ['cipher:29240', 'cipher:29169'] + rec_ids: ['cipher:29240', 'cipher:29169'] + replaced_ids: ['ohdsi:1093', 'ohdsi:866', 'ohdsi:1290'] + blocked_pool_ids: ['ohdsi:1093', 'ohdsi:866', 'ohdsi:1290'] + blocked_candidate_reasons: {'ohdsi:1093': 'procedure_for_diagnosis_intent', 'ohdsi:866': 'procedure_for_diagnosis_intent', 'ohdsi:1290': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:29240', 'cipher:29169'] + final_deterministic: {'selected_ids': ['cipher:29240', 'cipher:29169'], 'matched_llm_ids': ['cipher:29240', 'cipher:29169'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 69: patients with COPD according to diagnostic codes in the EHR + shortlist: ['cipher:29794', 'cipher:4241', 'cipher:29756', 'cipher:31297'] + rec_ids: ['cipher:29794', 'cipher:4241', 'cipher:29756'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:29794', 'cipher:4241', 'cipher:29756', 'cipher:31297'] + final_deterministic: {'selected_ids': ['cipher:29794', 'cipher:4241', 'cipher:29756'], 'matched_llm_ids': ['cipher:29794', 'cipher:4241', 'cipher:29756'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 70: patients hospitalized at least once for heart failure + shortlist: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + rec_ids: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + final_deterministic: {'selected_ids': ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'], 'matched_llm_ids': ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + shortlist: ['cipher:30803', 'cipher:31250', 'cipher:31195'] + rec_ids: ['cipher:30803', 'cipher:31250', 'cipher:31195'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30803', 'cipher:31250', 'cipher:31195'] + final_deterministic: {'selected_ids': ['cipher:30803', 'cipher:31250', 'cipher:31195'], 'matched_llm_ids': ['cipher:31250', 'cipher:31195'], 'defaulted_ids': ['cipher:30803'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 1: Patients with an implanted cardiac defibrillator + intent_facets_raw: {'condition_or_topic': 'cardiac defibrillator', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients with implanted devices', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['device-related'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['cardiac patients']} + intent_facets_effective: {'condition_or_topic': 'cardiac defibrillator', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients with implanted devices', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['device-related'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['cardiac patients']} + planning_shortlist: ['cipher:13288', 'cipher:2288'] + planning_reasoning: ['Selected shortlisted candidates align with cardiac defibrillator as a diagnosis-oriented study intent.', 'Included Cardiac Defibrillator in Situ (Phecode) as a comorbidity covariate candidate focused on Cardiac Defibrillator in Situ.', 'Included Cardiac defibrillator in situ (MAP) as a outcome candidate focused on Cardiac defibrillator in situ.'] + recommendations: + cipher:13288 | Cardiac Defibrillator in Situ (Phecode) | This phenotype directly addresses the study intent of identifying patients with a cardiac defibrillator in situ, utilizing a well-established Phecode definition. + cipher:2288 | Cardiac defibrillator in situ (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned outcome match. + +CASE 2: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:14029', 'cipher:15684', 'cipher:2703'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included Fasciitis (Phecode) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (MAP) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + cipher:14029 | Fasciitis (Phecode) | This phenotype is defined using the Phecode mapping system, representing a diagnosis of Fasciitis based on ICD codes. + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis, based on ICD codes used in the Million Veteran Program (MVP). + cipher:2703 | Fasciitis (MAP) | This phenotype identifies patients with Fasciitis based on a MAP probability score exceeding a defined threshold. + +CASE 3: Patients with acute prostatitis + intent_facets_raw: {'condition_or_topic': 'prostatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'acute', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['acute inflammation of the prostate'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'prostatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'acute', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['acute inflammation of the prostate'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:2054'] + planning_reasoning: ['Selected shortlisted candidates align with prostatitis as a diagnosis-oriented study intent.', 'Included [P] Prostatitis as a diagnosis candidate focused on Prostatitis.', 'Included Acute Prostatitis (Phecode) as a diagnosis candidate focused on Acute Prostatitis.', 'Included Acute prostatitis (MAP) as a diagnosis candidate focused on Acute Prostatitis.'] + recommendations: + ohdsi:283 | [P] Prostatitis | This phenotype directly addresses acute prostatitis, aligning with the study intent of patients with this condition. + cipher:13720 | Acute Prostatitis (Phecode) | The Phecode phenotype provides a clinically relevant diagnosis for acute prostatitis based on ICD codes, a suitable option given the study's focus. + cipher:2054 | Acute prostatitis (MAP) | The MAP phenotype offers a probabilistic approach to identifying acute prostatitis based on clustering of ICD codes, which may be useful for further investigation. + +CASE 4: Patients who underwent esophagectomy + intent_facets_raw: {'condition_or_topic': 'esophagectomy', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital setting'], 'population_cues': ['patients undergoing surgical intervention']} + intent_facets_effective: {'condition_or_topic': 'esophagectomy', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital setting'], 'population_cues': ['patients undergoing surgical intervention']} + planning_shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870', 'ohdsi:1309'] + planning_reasoning: ['Selected shortlisted candidates align with esophagectomy as a procedure-oriented study intent.', 'Included [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults, inpt stay, no ED as a procedure candidate focused on Esophagectomy.'] + recommendations: + ohdsi:1097 | [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib | This phenotype directly addresses the study intent of patients who underwent esophagectomy and developed postoperative atrial fibrillation. + ohdsi:1294 | [P] Esophagectomy, adults | Selected from the top reranked shortlisted candidates as a clinically aligned procedure match. + ohdsi:870 | [P] Esophagectomy, adults, inpt stay, no ED | Selected from the top reranked shortlisted candidates as a clinically aligned procedure match. + +CASE 5: Patients diagnosed with peripheral neuritis + intent_facets_raw: {'condition_or_topic': 'peripheral neuritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'peripheral neuritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + planning_reasoning: ['Selected shortlisted candidates align with peripheral neuritis as a diagnosis-oriented study intent.', 'Included [P][R] Peripheral neuritis as a diagnosis candidate focused on Peripheral neuritis.', 'Included [P] Peripheral Neuropathy or Neuritits as a diagnosis candidate focused on Peripheral Neuropathy.', 'Included [P][R] Optic neuritis as a diagnosis candidate focused on Optic neuritis.', 'Near-duplicate topical variants were removed to preserve distinct recommendation coverage: ohdsi:540.'] + recommendations: + ohdsi:388 | [P][R] Peripheral neuritis | This phenotype directly represents the diagnosis of peripheral neuritis, aligning with the study intent of patients diagnosed with peripheral neuritis. + ohdsi:389 | [P] Peripheral Neuropathy or Neuritits | This phenotype identifies the first occurrence of peripheral neuropathy or neuritis, providing a broader scope relevant to the study of peripheral neuritis. + ohdsi:238 | [P][R] Optic neuritis | Optic neuritis is a specific form of peripheral neuritis, making it a relevant phenotype for patients diagnosed with peripheral neuritis. + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + intent_facets_raw: {'condition_or_topic': 'TNF-alpha inhibitors AND IL-12/23 inhibitors', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'concomitant exposure', 'validation_preference': 'not_specified', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication'], 'care_setting_cues': ['any'], 'population_cues': ['concomitant exposure']} + intent_facets_effective: {'condition_or_topic': 'TNF-alpha inhibitors AND IL-12/23 inhibitors', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'concomitant exposure', 'validation_preference': 'not_specified', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication'], 'care_setting_cues': ['any'], 'population_cues': ['concomitant exposure']} + planning_shortlist: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + planning_reasoning: ['Selected shortlisted candidates align with TNF-alpha inhibitors AND IL-12/23 inhibitors as a medication based-oriented study intent.', 'Included [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap as a medication based candidate focused on IL-23 Inhibitors.', 'Included [P] New users of IL-23 inhibitors as a medication based candidate focused on IL-23 inhibitors.', 'Included [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap as a medication based candidate focused on TNF-alpha Inhibitors, IL23 Inhibitors.'] + recommendations: + ohdsi:760 | [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap | This phenotype directly addresses the study's focus on concomitant IL-23 and IL-12/23 inhibitor usage. + ohdsi:1042 | [P] New users of IL-23 inhibitors | This phenotype identifies patients initiating IL-23 inhibitor exposure, which is relevant to assessing the temporal relationship with TNF-alpha inhibitor use. + ohdsi:759 | [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap | This phenotype captures concomitant use of TNF-alpha and IL23 inhibitors, aligning with the study intent of examining combined exposure. + +CASE 7: Patients with allergic rhinitis + intent_facets_raw: {'condition_or_topic': 'allergic rhinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'general', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Primary diagnosis related to allergic rhinitis'], 'care_setting_cues': ['Outpatient care for rhinitis'], 'population_cues': ['Adult or pediatric patients']} + intent_facets_effective: {'condition_or_topic': 'allergic rhinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'general', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Primary diagnosis related to allergic rhinitis'], 'care_setting_cues': ['Outpatient care for rhinitis'], 'population_cues': ['Adult or pediatric patients']} + planning_shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081'] + planning_reasoning: ['Selected shortlisted candidates align with allergic rhinitis as a diagnosis-oriented study intent.', 'Included [P][R] Allergic rhinitis as a diagnosis candidate focused on Allergic rhinitis.', 'Included [P] Allergic Rhinitis as a diagnosis candidate focused on Allergic Rhinitis.', 'Included Allergic rhinitis (MAP) as a diagnosis candidate focused on Allergic Rhinitis.'] + recommendations: + ohdsi:508 | [P][R] Allergic rhinitis | This phenotype represents the diagnosis of Allergic rhinitis, primarily used for cohort definition, directly aligning with the study intent. + ohdsi:367 | [P] Allergic Rhinitis | This phenotype represents events of Allergic Rhinitis. Primarily intended for diagnosing this condition, providing a relevant diagnostic measure. + cipher:2081 | Allergic rhinitis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 8: Patients with ischemic heart disease + intent_facets_raw: {'condition_or_topic': 'ischemic heart disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'ischemic heart disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['adult patients']} + planning_shortlist: ['cipher:16261', 'cipher:29560', 'cipher:29218', 'ohdsi:654'] + planning_reasoning: ['Selected shortlisted candidates align with ischemic heart disease as a diagnosis-oriented study intent.', 'Included Ischemic Heart Disease (Sandhu) as a diagnosis candidate focused on Ischemic Heart Disease.', 'Included Ischemic Heart Disease (HDR UK) as a diagnosis candidate focused on Ischemic Heart Disease.', 'Included Coronary Heart Disease (HDR UK) as a diagnosis candidate focused on Coronary Heart Disease.'] + recommendations: + cipher:16261 | Ischemic Heart Disease (Sandhu) | This phenotype directly addresses the study intent of identifying patients with ischemic heart disease based on ICD-10 codes. + cipher:29560 | Ischemic Heart Disease (HDR UK) | This phenotype also identifies patients with Ischemic Heart Disease, utilizing NLP techniques for a potentially broader and more comprehensive definition. + cipher:29218 | Coronary Heart Disease (HDR UK) | This phenotype is closely related to ischemic heart disease and could be relevant for analysis, focusing on the underlying coronary arteries. + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + intent_facets_raw: {'condition_or_topic': 'Hemorrhage, Threatened labor', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Hemorrhage', 'Threatened labor'], 'care_setting_cues': ['Pregnancy', 'Obstetrics'], 'population_cues': ['Pregnant women', 'Early pregnancy']} + intent_facets_effective: {'condition_or_topic': 'Hemorrhage, Threatened labor', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Hemorrhage', 'Threatened labor'], 'care_setting_cues': ['Pregnancy', 'Obstetrics'], 'population_cues': ['Pregnant women', 'Early pregnancy']} + planning_shortlist: ['cipher:2643', 'cipher:17376', 'cipher:2798'] + planning_reasoning: ['Selected shortlisted candidates align with Hemorrhage, Threatened labor as a diagnosis-oriented study intent.', 'Included Early or threatened labor; hemorrhage in early pregnancy (MAP) as a complication candidate focused on Early or threatened labor; hemorrhage in early pregnancy.', 'Included Hemorrhage in Early Pregnancy (VADC) as a outcome candidate focused on Hemorrhage.', 'Included Hemorrhage in early pregnancy (MAP) as a diagnosis candidate focused on Hemorrhage in early pregnancy.'] + recommendations: + cipher:2643 | Early or threatened labor; hemorrhage in early pregnancy (MAP) | This phenotype identifies early or threatened labor with hemorrhage in early pregnancy, a complication of pregnancy, directly related to the study intent. + cipher:17376 | Hemorrhage in Early Pregnancy (VADC) | This phenotype defines hemorrhage outcomes in early pregnancy, primarily based on ICD-9 and ICD-10 codes, aligning with the study intent of 'Pregnant patients with hemorrhage in early pregnancy or thr + cipher:2798 | Hemorrhage in early pregnancy (MAP) | This phenotype identifies patients with early pregnancy hemorrhage based on a MAP algorithm, aligning with the study's focus on hemorrhage in pregnancy. + +CASE 10: Patients who underwent lung resection + intent_facets_raw: {'condition_or_topic': 'lung resection', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['all care settings'], 'population_cues': ['patients undergoing surgery']} + intent_facets_effective: {'condition_or_topic': 'lung resection', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['all care settings'], 'population_cues': ['patients undergoing surgery']} + planning_shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + planning_reasoning: ['Selected shortlisted candidates align with lung resection as a procedure-oriented study intent.', 'Included [P] Lung Resection, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, inpt stay, no ED as a procedure candidate focused on Lung Resection.'] + recommendations: + ohdsi:1268 | [P] Lung Resection, adults, inpt stay, no ED, post op new Afib | This phenotype directly matches the study intent of patients undergoing lung resection and specifically mentions postoperative atrial fibrillation, a relevant complication. + ohdsi:1308 | [P] Lung Resection, adults, post op new Afib | This phenotype aligns with the study intent by focusing on lung resection and post-operative atrial fibrillation. + ohdsi:869 | [P] Lung Resection, adults, inpt stay, no ED | This phenotype represents lung resection as a surgical procedure, which is relevant to the study intent. + +CASE 11: Patients with laryngitis + intent_facets_raw: {'condition_or_topic': 'laryngitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Acute upper airway inflammation'], 'care_setting_cues': ['Outpatient clinic'], 'population_cues': ['Patients']} + intent_facets_effective: {'condition_or_topic': 'laryngitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Acute upper airway inflammation'], 'care_setting_cues': ['Outpatient clinic'], 'population_cues': ['Patients']} + planning_shortlist: ['ohdsi:355', 'cipher:2046'] + planning_reasoning: ['Selected shortlisted candidates align with laryngitis as a diagnosis-oriented study intent.', 'Included [P] Laryngitis as a diagnosis candidate focused on Laryngitis.', 'Included Acute laryngitis and tracheitis (MAP) as a diagnosis candidate focused on Acute laryngitis and tracheitis.'] + recommendations: + ohdsi:355 | [P] Laryngitis | This phenotype directly represents the diagnosis of Laryngitis, aligning with the study intent of patients with laryngitis. + cipher:2046 | Acute laryngitis and tracheitis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 12: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13571', 'cipher:3534'] + planning_reasoning: ['Selected shortlisted candidates align with regional enteritis as a diagnosis-oriented study intent.', 'Included Regional Enteritis (Phecode) as a diagnosis candidate focused on Regional Enteritis.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.'] + recommendations: + cipher:13571 | Regional Enteritis (Phecode) | This phenotype aligns directly with the study intent of patients with regional enteritis, utilizing the Phecode grouping for ICD-9 and ICD-10 codes. + cipher:3534 | Regional enteritis (MAP) | This phenotype uses the MAP algorithm to identify patients with regional enteritis based on a probability cutoff, providing an alternative approach to diagnosis. + +CASE 13: Patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of renal sclerosis'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of renal sclerosis'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['adult patients']} + planning_shortlist: ['cipher:13646', 'cipher:13656', 'ohdsi:1003', 'cipher:17322'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Nephritis Nephrosis Renal Sclerosis (Phecode) as a diagnosis candidate focused on Nephritis Nephrosis Renal Sclerosis.', 'Included Renal Sclerosis NOS (Phecode) as a diagnosis candidate focused on Renal Sclerosis.', 'Included [P] Renal cancer as a diagnosis candidate focused on Renal cancer.'] + recommendations: + cipher:13646 | Nephritis Nephrosis Renal Sclerosis (Phecode) | This phenotype definition aligns directly with the study intent of patients with renal sclerosis, utilizing ICD-9 and ICD-10 codes for diagnosis. + cipher:13656 | Renal Sclerosis NOS (Phecode) | This phenotype represents another definition of renal sclerosis using ICD-9 and ICD-10 codes, providing a complementary representation of the condition. + ohdsi:1003 | [P] Renal cancer | While primarily focused on renal cancer, this phenotype captures patients with a diagnosis related to kidney disease, which can be relevant within the broader context of renal sclerosis. + +CASE 14: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of heart condition'], 'care_setting_cues': ['hospitalized patients', 'clinic visits'], 'population_cues': ['cardiac patients']} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of heart condition'], 'care_setting_cues': ['hospitalized patients', 'clinic visits'], 'population_cues': ['cardiac patients']} + planning_shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30192'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.', 'Included Other Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Cardiomyopathy.'] + recommendations: + cipher:31252 | Cardiomyopathy (Knight) | The HDR UK phenotype based on ICD10, SNOMED CT and Read codes v2 specifically identifies patients with Cardiomyopathy, fitting the study's focus. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype directly addresses Takotsubo cardiomyopathy, a specific type of cardiomyopathy, aligning with the study intent. + cipher:30192 | Other Cardiomyopathy (HDR UK) | This HDR UK phenotype captures patients with other forms of cardiomyopathy, broadening the scope to align with the study intent. + +CASE 15: Patients with a diagnosis of PRES + intent_facets_raw: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with PRES as a diagnosis-oriented study intent.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | This phenotype directly aligns with the study intent of 'Patients with a diagnosis of PRES'. + +CASE 16: Patients with anorexia nervosa + intent_facets_raw: {'condition_or_topic': 'anorexia nervosa', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient|ed|any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['general medical care', 'specialized eating disorder treatment'], 'population_cues': ['individuals']} + intent_facets_effective: {'condition_or_topic': 'anorexia nervosa', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient|ed|any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['general medical care', 'specialized eating disorder treatment'], 'population_cues': ['individuals']} + planning_shortlist: ['ohdsi:1340', 'cipher:17187', 'cipher:2117'] + planning_reasoning: ['Selected shortlisted candidates align with anorexia nervosa as a diagnosis-oriented study intent.', 'Included [P] Anorexia Nervosa as a diagnosis candidate focused on Anorexia Nervosa.', 'Included Anorexia Nervosa (VADC) as a diagnosis candidate focused on Anorexia Nervosa.', 'Included Anorexia nervosa (MAP) as a diagnosis candidate focused on Anorexia Nervosa.'] + recommendations: + ohdsi:1340 | [P] Anorexia Nervosa | This phenotype represents the diagnosis of Anorexia Nervosa, defined as events lasting 30 days, collapsed within 1 year. It aligns directly with the study intent of patients with anorexia nervosa. + cipher:17187 | Anorexia Nervosa (VADC) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:2117 | Anorexia nervosa (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 17: Patients with dizziness, vertigo, or motion sickness + intent_facets_raw: {'condition_or_topic': 'dizziness, vertigo, motion sickness', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dizziness, vertigo, motion sickness', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13215', 'ohdsi:893', 'cipher:3402'] + planning_reasoning: ['Selected shortlisted candidates align with dizziness, vertigo, motion sickness as a diagnosis-oriented study intent.', 'Included Dizziness and Giddiness (Lightheadedness and Vertigo) (Phecode) as a diagnosis candidate focused on Dizziness and Giddiness (Lightheadedness and Vertigo).', 'Included [P] Vertigo as a diagnosis candidate focused on Vertigo.', 'Included Peripheral or central vertigo (MAP) as a diagnosis candidate focused on Vertigo.'] + recommendations: + cipher:13215 | Dizziness and Giddiness (Lightheadedness and Vertigo) (Phecode) | This phenotype aligns with the study intent by representing a defined set of ICD-9 and ICD-10 codes related to dizziness and giddiness, primarily used for diagnostic purposes. + ohdsi:893 | [P] Vertigo | This phenotype directly reflects the study intent of 'Patients with dizziness, vertigo, or motion sickness' and represents a diagnosis of vertigo. + cipher:3402 | Peripheral or central vertigo (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 18: Patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'elderly adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['rheumatic disease'], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['elderly']} + intent_facets_effective: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'elderly adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['rheumatic disease'], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['elderly']} + planning_shortlist: ['ohdsi:670', 'cipher:30277', 'cipher:13992'] + planning_reasoning: ['Selected shortlisted candidates align with polymyalgia rheumatica as a diagnosis-oriented study intent.', 'Included [P][R] Temporal arteritis as a diagnosis candidate focused on Temporal arteritis.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (Phecode) as a diagnosis candidate focused on Polymyalgia Rheumatica.'] + recommendations: + ohdsi:670 | [P][R] Temporal arteritis | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype aligns directly with the study intent 'Patients with polymyalgia rheumatica' and is based on ICD10 codes, making it a suitable diagnostic indicator. + cipher:13992 | Polymyalgia Rheumatica (Phecode) | This phenotype also represents Polymyalgia Rheumatica defined using ICD codes, providing an alternative diagnostic approach. + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + intent_facets_raw: {'condition_or_topic': 'corticosteroid adverse effects', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'corticosteroid adverse effects', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:2064', 'cipher:2123', 'cipher:2125'] + planning_reasoning: ['Selected shortlisted candidates align with corticosteroid adverse effects as a diagnosis-oriented study intent.', 'Included Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Adrenal Cortical Steroids Adverse Effects.', 'Included Antilipemic and antiarteriosclerotic drugs causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Lipid-Lowering Drug Adverse Effects.', 'Included Antirheumatics causing adverse effects in therapeutic use (MAP) as a comorbidity covariate candidate focused on Antirheumatics adverse effects.'] + recommendations: + cipher:2064 | Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) | This phenotype directly addresses the study intent of identifying patients with adverse effects from therapeutic corticosteroid use, as defined by a MAP phenotype based on ICD code utilization. The de + cipher:2123 | Antilipemic and antiarteriosclerotic drugs causing adverse effects in therapeutic use (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned complication match. + cipher:2125 | Antirheumatics causing adverse effects in therapeutic use (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 20: Patients with low blood pressure + intent_facets_raw: {'condition_or_topic': 'Hypotension', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Hypotension', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:890', 'ohdsi:526', 'cipher:13390'] + planning_reasoning: ['Selected shortlisted candidates align with Hypotension as a diagnosis-oriented study intent.', 'Included [P] Hypotension as a severity candidate focused on Hypotension.', 'Included [P][R] Orthostatic hypotension as a severity candidate focused on Orthostatic hypotension.', 'Included Hypotension Nos (Phecode) as a diagnosis candidate focused on Hypotension.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:997.'] + recommendations: + ohdsi:890 | [P] Hypotension | This phenotype directly relates to the study intent of 'Patients with low blood pressure' and represents a severity measure of hypotension. + ohdsi:526 | [P][R] Orthostatic hypotension | This phenotype represents a specific type of hypotension, aligning with the study intent of low blood pressure. + cipher:13390 | Hypotension Nos (Phecode) | This phenotype defines hypotension based on ICD-9 and ICD-10 codes, providing a diagnostic measure of low blood pressure as requested. + +CASE 21: Patients with encephalopathy + intent_facets_raw: {'condition_or_topic': 'encephalopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of encephalopathy'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['patients with encephalopathy']} + intent_facets_effective: {'condition_or_topic': 'encephalopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['diagnosis of encephalopathy'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['patients with encephalopathy']} + planning_shortlist: ['ohdsi:194', 'cipher:2664', 'ohdsi:331', 'ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with encephalopathy as a diagnosis-oriented study intent.', 'Included [P] Encephalopathy or its presentations as a diagnosis candidate focused on Encephalopathy.', 'Included Encephalopathy, not elsewhere classified (MAP) as a diagnosis candidate focused on Encephalopathy.', 'Included [P] Encephalopathy as a diagnosis candidate focused on Encephalopathy.'] + recommendations: + ohdsi:194 | [P] Encephalopathy or its presentations | This phenotype directly addresses the study intent of patients with encephalopathy, representing the clinical identification of encephalopathy, encompassing various presentations and underlying causes + cipher:2664 | Encephalopathy, not elsewhere classified (MAP) | This phenotype identifies patients based on an ICD code probability cutoff of 0.45, primarily used for diagnosing Encephalopathy, providing an additional approach. + ohdsi:331 | [P] Encephalopathy | This phenotype represents the clinical diagnosis of Encephalopathy, a neurological condition characterized by a range of symptoms, aligning with the study intent. + +CASE 22: Patients with birdshot chorioretinitis + intent_facets_raw: {'condition_or_topic': 'birdshot chorioretinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'ophthalmology patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'birdshot chorioretinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'ophthalmology patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1223'] + planning_reasoning: ['Selected shortlisted candidates align with birdshot chorioretinitis as a diagnosis-oriented study intent.', 'Included [P] Birdshot chorioretinitis as a diagnosis candidate focused on Uveitis.'] + recommendations: + ohdsi:1223 | [P] Birdshot chorioretinitis | This phenotype is specifically defined for patients with Birdshot chorioretinitis, aligning directly with the study intent of investigating patients with this condition. + +CASE 23: Older adults with macular degeneration + intent_facets_raw: {'condition_or_topic': 'macular degeneration', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'older adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['macular degeneration'], 'care_setting_cues': ['outpatient'], 'population_cues': ['older adults']} + intent_facets_effective: {'condition_or_topic': 'macular degeneration', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'older adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['macular degeneration'], 'care_setting_cues': ['outpatient'], 'population_cues': ['older adults']} + planning_shortlist: ['cipher:30295', 'cipher:3006', 'cipher:3005', 'cipher:2505'] + planning_reasoning: ['Selected shortlisted candidates align with macular degeneration as a diagnosis-oriented study intent.', 'Included Macular Degeneration (HDR UK) as a diagnosis candidate focused on Macular Degeneration.', 'Included Macular degeneration (senile) of retina NOS (MAP) as a diagnosis candidate focused on Macular Degeneration.', 'Included Macular degeneration, dry (MAP) as a diagnosis candidate focused on Macular Degeneration.'] + recommendations: + cipher:30295 | Macular Degeneration (HDR UK) | This phenotype directly addresses the study intent of older adults with macular degeneration, utilizing ICD10 codes, Med Codes, and Read codes v2 for identification. + cipher:3006 | Macular degeneration (senile) of retina NOS (MAP) | This MAP phenotype identifies patients with Macular Degeneration based on ICD codes as requested by the study intent. + cipher:3005 | Macular degeneration, dry (MAP) | This phenotype identifies patients with dry macular degeneration, aligning with the study intent of older adults with macular degeneration. + +CASE 24: Patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Autoimmune hemolytic anemia (OHDSI) as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | Identifies the earliest diagnosis of Warm Autoimmune Hemolytic Anemia, providing a clear starting point for the study. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype directly identifies patients with Autoimmune hemolytic anemia, aligning with the study intent. + cipher:18441 | Autoimmune hemolytic anemia (OHDSI) | This phenotype provides another definition of Autoimmune hemolytic anemia, sourced from the OHDSI Phenotype Library. + +CASE 25: Patients with MSI-low rectal adenocarcinoma + intent_facets_raw: {'condition_or_topic': 'MSI-low rectal adenocarcinoma', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'cancer patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'MSI-low rectal adenocarcinoma', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'cancer patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: [] + planning_reasoning: ['Selected shortlisted candidates align with MSI-low rectal adenocarcinoma as a diagnosis-oriented study intent.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:845, ohdsi:844, ohdsi:812.'] + recommendations: + +CASE 26: Patients with blistering skin lesions + intent_facets_raw: {'condition_or_topic': 'Blistering skin lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with skin lesions', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Blistering skin lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with skin lesions', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:652', 'ohdsi:376'] + planning_reasoning: ['Selected shortlisted candidates align with Blistering skin lesions as a diagnosis-oriented study intent.', 'Included [P][R] Vasculitis of the skin as a diagnosis candidate focused on Vasculitis of the skin.', 'Included [P][R] Bleeding skin as a outcome candidate focused on Bleeding Skin.'] + recommendations: + ohdsi:652 | [P][R] Vasculitis of the skin | This phenotype represents events of Vasculitis of the skin, a potential cause of blistering skin lesions, and is specifically targeted as a diagnosis. + ohdsi:376 | [P][R] Bleeding skin | Bleeding skin can manifest as blistering skin lesions, making this phenotype relevant to the study intent. + +CASE 27: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + planning_reasoning: ['Selected shortlisted candidates align with Stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and mucositis (gwPheWAS) as a complication candidate focused on Stomatitis and mucositis.', 'Included Stomatitis and mucositis (ulcerative) (MAP) as a diagnosis candidate focused on Stomatitis and mucositis (ulcerative).'] + recommendations: + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | This phenotype directly addresses the study intent of patients with stomatitis or mucositis. + cipher:15333 | Stomatitis and mucositis (gwPheWAS) | This phenotype also focuses on stomatitis and mucositis, aligning with the study's primary interest. + cipher:3657 | Stomatitis and mucositis (ulcerative) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 28: Patients with neurofibromatosis type 1 + intent_facets_raw: {'condition_or_topic': 'neurofibromatosis type 1', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'neurofibromatosis type 1', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + planning_reasoning: ['Selected shortlisted candidates align with neurofibromatosis type 1 as a diagnosis-oriented study intent.', 'Included [P][R] Neurofibromatosis type 1 as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 without Type 2 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.'] + recommendations: + ohdsi:697 | [P][R] Neurofibromatosis type 1 | This phenotype directly reflects the study intent: patients with neurofibromatosis type 1. + ohdsi:304 | Neurofibromatosis type 1 (FP) | Another established phenotype representing the diagnosis of Neurofibromatosis type 1. + ohdsi:305 | Neurofibromatosis type 1 without Type 2 (FP) | This phenotype helps refine the patient selection by excluding those with NF2-related complications. + +CASE 29: Patients with keloid scars + intent_facets_raw: {'condition_or_topic': 'Keloid Scars', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with keloid scars', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Keloid Scars', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with keloid scars', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13930', 'cipher:15610', 'cipher:2950'] + planning_reasoning: ['Selected shortlisted candidates align with Keloid Scars as a diagnosis-oriented study intent.', 'Included Keloid Scar (Phecode) as a diagnosis candidate focused on Keloid Scar.', 'Included Keloid scar (gwPheWAS) as a comorbidity covariate candidate focused on Keloid Scar.', 'Included Keloid scar (MAP) as a comorbidity covariate candidate focused on Keloid Scar.'] + recommendations: + cipher:13930 | Keloid Scar (Phecode) | This Phecode definition based on ICD-9 and ICD-10 codes directly addresses the study intent of patients with keloid scars and is classified as a diagnosis phenotype. + cipher:15610 | Keloid scar (gwPheWAS) | This phenotype, utilized in the Million Veteran Program, represents a comorbidity/covariate related to Keloid Scar, aligning with the study's focus on identifying relevant covariates. + cipher:2950 | Keloid scar (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 30: Patients with acetaminophen exposure + intent_facets_raw: {'condition_or_topic': 'acetaminophen exposure', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'acetaminophen exposure', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1187', 'ohdsi:1427', 'ohdsi:1158'] + planning_reasoning: ['Selected shortlisted candidates align with acetaminophen exposure as a medication based-oriented study intent.', 'Included [P] acetaminophen exposure 10 as a medication based candidate focused on acetaminophen exposure.', 'Included [P] Acamprosate, all exposures as a medication based candidate focused on Acamprosate Exposure.', 'Included [P] Aspirin 10 as a medication based candidate focused on Aspirin Exposure.'] + recommendations: + ohdsi:1187 | [P] acetaminophen exposure 10 | This phenotype directly addresses the study intent of "Patients with acetaminophen exposure" by representing exposure to acetaminophen with a 30-day persistence window. + ohdsi:1427 | [P] Acamprosate, all exposures | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1158 | [P] Aspirin 10 | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 31: Patients exposed to rifamycin antibiotics + intent_facets_raw: {'condition_or_topic': 'Rifamycin antibiotics', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Antibiotics'], 'care_setting_cues': ['any patient population'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'Rifamycin antibiotics', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Antibiotics'], 'care_setting_cues': ['any patient population'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + planning_reasoning: ['Selected shortlisted candidates align with Rifamycin antibiotics as a medication based-oriented study intent.', 'Included [P] Antibiotics Rifamycins 10 as a medication based candidate focused on Rifamycins.', 'Included [P] Antibiotics Monobactams 10 as a medication based candidate focused on Antibiotics - Monobactams.', 'Included [P] Antibiotics Cephalosporins 10 as a medication based candidate focused on Antibiotics Cephalosporins.'] + recommendations: + ohdsi:1211 | [P] Antibiotics Rifamycins 10 | This phenotype directly captures exposure to rifamycin antibiotics, aligning with the study intent to investigate patients exposed to these medications. + ohdsi:1207 | [P] Antibiotics Monobactams 10 | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1203 | [P] Antibiotics Cephalosporins 10 | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 32: Patients with a joint or ligament sprain + intent_facets_raw: {'condition_or_topic': 'sprain', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'general patient population', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'sprain', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'general patient population', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:14236', 'cipher:3569', 'cipher:2944', 'cipher:15814'] + planning_reasoning: ['Selected shortlisted candidates align with sprain as a diagnosis-oriented study intent.', 'Included Joint Ligament Sprain (Phecode) as a diagnosis candidate focused on Joint Ligament Sprain.', 'Included Rotator cuff (capsule) sprain (MAP) as a diagnosis candidate focused on Rotator cuff (capsule) sprain.', 'Included Joint-ligament sprain (MAP) as a comorbidity covariate candidate focused on Ligament sprain.'] + recommendations: + cipher:14236 | Joint Ligament Sprain (Phecode) | This phenotype directly aligns with the study intent of patients with a joint or ligament sprain, defined using the Phecode mapping. + cipher:3569 | Rotator cuff (capsule) sprain (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:2944 | Joint-ligament sprain (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 33: Pregnant patients with miscarriage or stillbirth + intent_facets_raw: {'condition_or_topic': 'miscarriage|stillbirth', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'pregnant_patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['pregnancy']} + intent_facets_effective: {'condition_or_topic': 'miscarriage|stillbirth', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'pregnant_patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['pregnancy']} + planning_shortlist: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + planning_reasoning: ['Selected shortlisted candidates align with miscarriage|stillbirth as a outcome-oriented study intent.', 'Included [P][R] Miscarriage as a outcome candidate focused on Miscarriage.', 'Included [P] Still birth as a outcome candidate focused on Stillbirth.', 'Included Miscarriage; stillbirth (MAP) as a outcome candidate focused on Miscarriage; Stillbirth.'] + recommendations: + ohdsi:627 | [P][R] Miscarriage | This phenotype directly represents the event of miscarriage, aligning with the study intent of pregnant patients with miscarriage or stillbirth. + ohdsi:1432 | [P] Still birth | This phenotype represents stillbirth, a related outcome within the study intent. + cipher:3056 | Miscarriage; stillbirth (MAP) | This phenotype captures a combined outcome of miscarriage and stillbirth derived from MAP unsupervised clustering; may be useful for exploration. + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + intent_facets_raw: {'condition_or_topic': 'arterial embolism or thrombosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'lower extremity', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Lower extremity arterial events'], 'care_setting_cues': ['Hospital', 'Clinic'], 'population_cues': ['Peripheral arteries']} + intent_facets_effective: {'condition_or_topic': 'arterial embolism or thrombosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'lower extremity', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Lower extremity arterial events'], 'care_setting_cues': ['Hospital', 'Clinic'], 'population_cues': ['Peripheral arteries']} + planning_shortlist: ['cipher:13354', 'cipher:2142'] + planning_reasoning: ['Selected shortlisted candidates align with arterial embolism or thrombosis as a diagnosis-oriented study intent.', 'Included Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) as a complication candidate focused on Arterial Embolism and Thrombosis.', 'Included Arterial embolism and thrombosis of lower extremity artery (MAP) as a complication candidate focused on Arterial Embolism and Thrombosis.'] + recommendations: + cipher:13354 | Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) | Direct mapping of ICD codes to the specified condition, as described in the source dataset. + cipher:2142 | Arterial embolism and thrombosis of lower extremity artery (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned complication match. + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + intent_facets_raw: {'condition_or_topic': 'urinary tract infection', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'new users', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['patient'], 'care_setting_cues': ['outpatient care'], 'population_cues': ['new patients']} + intent_facets_effective: {'condition_or_topic': 'urinary tract infection', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'new users', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['patient'], 'care_setting_cues': ['outpatient care'], 'population_cues': ['new patients']} + planning_shortlist: ['ohdsi:1301', 'ohdsi:1186', 'cipher:31223'] + planning_reasoning: ['Selected shortlisted candidates align with urinary tract infection as a diagnosis-oriented study intent.', 'Included [P] Acute Urinary tract infections UTI events as a diagnosis candidate focused on Urinary Tract Infection.', 'Included [P] Urinary tract infectious 10 as a diagnosis candidate focused on Urinary Tract Infection.', 'Included Urinary Tract Infection (Palin) as a diagnosis candidate focused on Urinary Tract Infection.'] + recommendations: + ohdsi:1301 | [P] Acute Urinary tract infections UTI events | This phenotype directly addresses the study intent of patients with urinary tract infections and new users of cephalosporins. + ohdsi:1186 | [P] Urinary tract infectious 10 | Identifies patients with a diagnosis of Urinary Tract Infection, aligning with the study focus on UTIs. + cipher:31223 | Urinary Tract Infection (Palin) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 36: Patients hospitalized with preinfarction syndrome + intent_facets_raw: {'condition_or_topic': 'Preinfarction syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'Hospitalized patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient hospitalization'], 'care_setting_cues': ['Inpatient care'], 'population_cues': ['Hospitalized individuals']} + intent_facets_effective: {'condition_or_topic': 'Preinfarction syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'Hospitalized patients', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient hospitalization'], 'care_setting_cues': ['Inpatient care'], 'population_cues': ['Hospitalized individuals']} + planning_shortlist: ['ohdsi:939'] + planning_reasoning: ['Selected shortlisted candidates align with Preinfarction syndrome as a diagnosis-oriented study intent.', 'Included [P] Hospitalization with preinfarction syndrome as a outcome candidate focused on Hospitalization.'] + recommendations: + ohdsi:939 | [P] Hospitalization with preinfarction syndrome | This phenotype directly represents the study intent: inpatient or ER visits with preinfarction syndrome. + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + intent_facets_raw: {'condition_or_topic': 'Blood or blood-forming organ disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients with personal history', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Blood or blood-forming organ disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients with personal history', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:738', 'cipher:3412'] + planning_reasoning: ['Selected shortlisted candidates align with Blood or blood-forming organ disease as a diagnosis-oriented study intent.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Personal history of diseases of blood and blood-forming organs (MAP) as a comorbidity covariate candidate focused on Personal history of diseases of blood and blood-forming organs.'] + recommendations: + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype directly addresses the study intent of patients with a personal history of blood or blood-forming organ disease, specifically focusing on Autoimmune hemolytic anemia. + cipher:3412 | Personal history of diseases of blood and blood-forming organs (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 38: Patients with benign pancreatic conditions + intent_facets_raw: {'condition_or_topic': 'pancreatic conditions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'pancreatic conditions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + planning_reasoning: ['Selected shortlisted candidates align with pancreatic conditions as a diagnosis-oriented study intent.', 'Included Pancreas Transplant Recipient (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Conditions.', 'Included Extrahepatic Cholangiocarcinoma (Nguyen) as a diagnosis candidate focused on Pancreatic Cancer.', 'Included Chronic Pancreatitis (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Inflammation.'] + recommendations: + cipher:16954 | Pancreas Transplant Recipient (Nguyen) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + cipher:16952 | Extrahepatic Cholangiocarcinoma (Nguyen) | Identifies patients with pancreatic cancer based on ICD-10 codes, aligning with the study intent of investigating prevalence among US veterans. + cipher:16953 | Chronic Pancreatitis (Nguyen) | Identifies veterans with pancreatic inflammation, a covariate for investigating pancreatic cancer prevalence, relevant to the study's focus. + +CASE 39: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['cipher:3192', 'cipher:4399', 'cipher:4029', 'cipher:3190'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Finger Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Thumb Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype identifies patients with osteoarthritis based on a MAP algorithm probability cutoff of 0.41, aligning with the study intent of identifying patients with primary localized osteoarthritis + cipher:4399 | Finger Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis focusing on finger OA, a relevant component of localized osteoarthritis and suitable for genetic marker analysis. + cipher:4029 | Thumb Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis focusing on thumb OA, a relevant component of localized osteoarthritis and suitable for genetic marker analysis. + +CASE 40: New users of dihydropyridine calcium channel blockers + intent_facets_raw: {'condition_or_topic': 'dihydropyridine calcium channel blockers', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dihydropyridine calcium channel blockers', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1047', 'ohdsi:1048', 'ohdsi:1052', 'ohdsi:1036'] + planning_reasoning: ['Selected shortlisted candidates align with dihydropyridine calcium channel blockers as a medication based-oriented study intent.', 'Included [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension as a medication based candidate focused on dihydropyridine calcium channel blockers.', 'Included [P] New users of dihydropyridine calcium channel blockers as a medication based candidate focused on dihydropyridine calcium channel blockers.', 'Included [P] New users of Beta blockers nested in Acute Myocardial Infarction as a medication based candidate focused on Beta Blockers.'] + recommendations: + ohdsi:1047 | [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension | This phenotype directly addresses the study intent of identifying new users of dihydropyridine calcium channel blockers for hypertension management. + ohdsi:1048 | [P] New users of dihydropyridine calcium channel blockers | This phenotype represents a direct match to the study intent, focusing on identifying patients newly prescribed these calcium channel blockers. + ohdsi:1052 | [P] New users of Beta blockers nested in Acute Myocardial Infarction | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 41: Veteran patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['cipher:13656', 'cipher:17322', 'cipher:18902', 'cipher:31257'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Renal Sclerosis NOS (Phecode) as a diagnosis candidate focused on Renal Sclerosis.', 'Included Renal Sclerosis NOS (VADC) as a comorbidity covariate candidate focused on Renal Sclerosis.', 'Included Renal failure (PERC) as a comorbidity covariate candidate focused on Renal Failure.'] + recommendations: + cipher:13656 | Renal Sclerosis NOS (Phecode) | This phenotype represents a diagnosis of Renal Sclerosis based on ICD-9 and ICD-10 codes, aligning directly with the study intent of 'Veteran patients with renal sclerosis'. + cipher:17322 | Renal Sclerosis NOS (VADC) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + cipher:18902 | Renal failure (PERC) | This phenotype identifies moderate and severe renal failure in Veterans, which is a relevant comorbidity covariate for studying renal sclerosis. + +CASE 42: Veteran patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Veteran', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Rheumatic disease'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Veteran', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Rheumatic disease'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Veterans']} + planning_shortlist: ['cipher:30277', 'cipher:13992', 'cipher:17453'] + planning_reasoning: ['Selected shortlisted candidates align with polymyalgia rheumatica as a diagnosis-oriented study intent.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (Phecode) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (VADC) as a comorbidity covariate candidate focused on Polymyalgia Rheumatica.'] + recommendations: + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype is based on ICD10 codes and hospitalization diagnosis codes, aligning with the study intent of identifying patients with Polymyalgia Rheumatica. + cipher:13992 | Polymyalgia Rheumatica (Phecode) | The Phecode phenotype definition using ICD-9 and ICD-10 codes provides a clinically relevant approach to identifying patients with Polymyalgia Rheumatica. + cipher:17453 | Polymyalgia Rheumatica (VADC) | This phenotype, derived from the VA Data Commons and ICD codes, is appropriate for studying Polymyalgia Rheumatica within the Million Veteran Program. + +CASE 43: Veteran patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Veterans']} + planning_shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Autoimmune hemolytic anemia (OHDSI) as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype directly addresses autoimmune hemolytic anemia, a key component of the study intent. + cipher:18441 | Autoimmune hemolytic anemia (OHDSI) | This phenotype is a recognized OHDSI phenotype for autoimmune hemolytic anemia, aligning with the study population. + +CASE 44: Veteran patients with cardiac complications + intent_facets_raw: {'condition_or_topic': 'cardiac complications', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'cardiac complications', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['ohdsi:1081'] + planning_reasoning: ['Selected shortlisted candidates align with cardiac complications as a diagnosis-oriented study intent.', 'Included [P] FDA AESI Acute Myocardial Infarction or its complications as a diagnosis candidate focused on Acute Myocardial Infarction.'] + recommendations: + ohdsi:1081 | [P] FDA AESI Acute Myocardial Infarction or its complications | This phenotype directly addresses the study intent of identifying veteran patients with cardiac complications, specifically Acute Myocardial Infarction. + +CASE 45: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:15684', 'cipher:2703', 'cipher:14029'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (MAP) as a diagnosis candidate focused on Fasciitis.', 'Included Fasciitis (Phecode) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis, based on ICD codes used in the Million Veteran Program (MVP) phenome-wide GWAS. + cipher:2703 | Fasciitis (MAP) | This phenotype identifies patients with Fasciitis based on a MAP probability score exceeding a defined threshold. + cipher:14029 | Fasciitis (Phecode) | This phenotype represents a diagnosis of Fasciitis based on ICD-9 and ICD-10 codes. It is defined by the Phecode mapping system. + +CASE 46: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Oral disease'], 'care_setting_cues': ['Dental clinic', 'Hospital'], 'population_cues': ['Patients']} + intent_facets_effective: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Oral disease'], 'care_setting_cues': ['Dental clinic', 'Hospital'], 'population_cues': ['Patients']} + planning_shortlist: ['cipher:3657', 'cipher:17298', 'cipher:13516'] + planning_reasoning: ['Selected shortlisted candidates align with Stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and mucositis (ulcerative) (MAP) as a diagnosis candidate focused on Stomatitis and mucositis (ulcerative).', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and Mucositis (Ulcerative) (Phecode) as a comorbidity covariate candidate focused on Stomatitis and Mucositis.'] + recommendations: + cipher:3657 | Stomatitis and mucositis (ulcerative) (MAP) | This phenotype is derived from a MAP algorithm and specifically targets patients with stomatitis and mucositis, aligning directly with the study intent. + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | This phenotype definition from the VA Data Commons focuses on Stomatitis and Mucositis, representing a validated clinical definition. + cipher:13516 | Stomatitis and Mucositis (Ulcerative) (Phecode) | This phenotype definition, based on the Phecode mapping, represents a documented case of Stomatitis and Mucositis, suitable for research. + +CASE 47: Patients with Barretts esophagus + intent_facets_raw: {'condition_or_topic': "Barrett's esophagus", 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': "Barrett's esophagus", 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13531', 'cipher:15342', 'cipher:2187'] + planning_reasoning: ["Selected shortlisted candidates align with Barrett's esophagus as a diagnosis-oriented study intent.", "Included Barretts Esophagus (Phecode) as a diagnosis candidate focused on Barrett's Esophagus.", "Included Barrett's esophagus (gwPheWAS) as a diagnosis candidate focused on Barrett's esophagus.", "Included Barrett's esophagus (MAP) as a diagnosis candidate focused on Barrett's esophagus."] + recommendations: + cipher:13531 | Barretts Esophagus (Phecode) | This phenotype definition is based on ICD-9 and ICD-10 codes representing Barrett's Esophagus, aligning with the study intent. + cipher:15342 | Barrett's esophagus (gwPheWAS) | This phenotype definition is also based on ICD codes and was used in the Million Veteran Program, making it relevant for this study. + cipher:2187 | Barrett's esophagus (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 48: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:884', 'cipher:3534', 'cipher:13571'] + planning_reasoning: ['Selected shortlisted candidates align with regional enteritis as a diagnosis-oriented study intent.', 'Included [P] Diarrhea including enteritis as a diagnosis candidate focused on Diarrhea.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.', 'Included Regional Enteritis (Phecode) as a diagnosis candidate focused on Regional Enteritis.'] + recommendations: + ohdsi:884 | [P] Diarrhea including enteritis | Diarrhea is a common symptom associated with regional enteritis and this phenotype captures the presence of diarrhea. + cipher:3534 | Regional enteritis (MAP) | This phenotype identifies patients with Regional enteritis based on a MAP algorithm probability cutoff. + cipher:13571 | Regional Enteritis (Phecode) | This phenotype identifies patients with Regional Enteritis based on the Phecode grouping using ICD codes. + +CASE 49: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'localized', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['localized']} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'localized', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['localized']} + planning_shortlist: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Finger Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Osteoarthritis; localized (MAP) as a diagnosis candidate focused on Osteoarthritis localized.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype identifies patients with osteoarthritis based on a MAP algorithm probability cutoff of 0.41, directly aligning with the study intent of primary localized osteoarthritis. + cipher:4399 | Finger Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis, specifically focusing on finger OA, which is relevant to the concept of localized osteoarthritis. + cipher:3190 | Osteoarthritis; localized (MAP) | This phenotype represents localized osteoarthritis based on MAP unsupervised clustering of ICD codes, aligning with the study intent of primary localized osteoarthritis. + +CASE 50: Patients with aortic valve disease + intent_facets_raw: {'condition_or_topic': 'aortic valve disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'aortic valve disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:30301', 'cipher:17250', 'cipher:2131', 'cipher:31315', 'ohdsi:1172'] + planning_reasoning: ['Selected shortlisted candidates align with aortic valve disease as a diagnosis-oriented study intent.', 'Included Nonrheumatic Aortic Valve Disorders (HDR UK) as a diagnosis candidate focused on Aortic Valve Disorders.', 'Included Aortic Valve Disease (VADC) as a comorbidity covariate candidate focused on Aortic Valve Disease.', 'Included Aortic valve disease (MAP) as a comorbidity covariate candidate focused on Aortic valve disease.'] + recommendations: + cipher:30301 | Nonrheumatic Aortic Valve Disorders (HDR UK) | This phenotype directly addresses patients with aortic valve disease based on HDR UK criteria and ICD-10 codes. + cipher:17250 | Aortic Valve Disease (VADC) | This phenotype represents Aortic Valve Disease as defined by the VA Data Commons, suitable for assessing comorbidity. + cipher:2131 | Aortic valve disease (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 51: Patients with chronic periodontitis + intent_facets_raw: {'condition_or_topic': 'chronic periodontitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'chronic periodontitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13494', 'cipher:3397', 'cipher:15317', 'cipher:2371'] + planning_reasoning: ['Selected shortlisted candidates align with chronic periodontitis as a diagnosis-oriented study intent.', 'Included Chronic Periodontitis (Phecode) as a diagnosis candidate focused on Chronic Periodontitis.', 'Included Periodontitis (acute or chronic) (MAP) as a diagnosis candidate focused on Periodontitis.', 'Included Chronic periodontitis (gwPheWAS) as a comorbidity covariate candidate focused on Chronic Periodontitis.'] + recommendations: + cipher:13494 | Chronic Periodontitis (Phecode) | This phenotype aligns directly with the study intent of patients with chronic periodontitis, defined using ICD codes and Phecode groupings. + cipher:3397 | Periodontitis (acute or chronic) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:15317 | Chronic periodontitis (gwPheWAS) | This phenotype represents Chronic Periodontitis as defined within the Million Veteran Program, utilizing ICD codes and associated with GWAS research. + +CASE 52: Patients with hypertensive chronic kidney disease + intent_facets_raw: {'condition_or_topic': 'hypertensive chronic kidney disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'hypertensive chronic kidney disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:923', 'cipher:2846', 'ohdsi:1191', 'ohdsi:964'] + planning_reasoning: ['Selected shortlisted candidates align with hypertensive chronic kidney disease as a diagnosis-oriented study intent.', 'Included [P] Kidney disease as a diagnosis candidate focused on Chronic Kidney Disease.', 'Included Hypertensive chronic kidney disease (MAP) as a comorbidity covariate candidate focused on Hypertensive Chronic Kidney Disease.', 'Included [P] Chronic kidney disease or end stage renal disease 10 as a comorbidity covariate candidate focused on Chronic Kidney Disease.'] + recommendations: + ohdsi:923 | [P] Kidney disease | This phenotype represents the initial record of Chronic Kidney Disease, aligning with the study intent of patients with hypertensive chronic kidney disease. + cipher:2846 | Hypertensive chronic kidney disease (MAP) | This phenotype identifies patients classified as having Hypertensive Chronic Kidney Disease based on a MAP algorithm, which is relevant to the study intent of hypertensive chronic kidney disease. + ohdsi:1191 | [P] Chronic kidney disease or end stage renal disease 10 | This phenotype represents Chronic Kidney Disease, a relevant covariate for studying outcomes in patients with hypertensive chronic kidney disease. + +CASE 53: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient diagnosis'], 'care_setting_cues': ['Hospitalized patients', 'Clinic visits'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'required', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Patient diagnosis'], 'care_setting_cues': ['Hospitalized patients', 'Clinic visits'], 'population_cues': ['Adult patients']} + planning_shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Other Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Cardiomyopathy.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.'] + recommendations: + cipher:30192 | Other Cardiomyopathy (HDR UK) | This phenotype identifies patients with a diagnosis of Other Cardiomyopathy based on ICD10 codes and hospitalizations, aligning with the study intent of patients with cardiomyopathy. + cipher:31252 | Cardiomyopathy (Knight) | This phenotype identifies patients with Cardiomyopathy based on ICD-10, SNOMED CT and Read Codes v2, aligning with the study intent of patients with cardiomyopathy. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype identifies patients with Takotsubo cardiomyopathy, which is a specific type of cardiomyopathy, aligning with the study intent. + +CASE 54: Patients with scleritis or episcleritis + intent_facets_raw: {'condition_or_topic': 'scleritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'scleritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:30069', 'cipher:3581'] + planning_reasoning: ['Selected shortlisted candidates align with scleritis as a diagnosis-oriented study intent.', 'Included Scleritis and Episcleritis (HDR UK) as a diagnosis candidate focused on Scleritis and Episcleritis.', 'Included Scleritis and episcleritis (MAP) as a diagnosis candidate focused on Scleritis and episcleritis.'] + recommendations: + cipher:30069 | Scleritis and Episcleritis (HDR UK) | This phenotype directly addresses the study intent of identifying patients with scleritis and episcleritis based on HDR UK criteria using ICD-10 codes. + cipher:3581 | Scleritis and episcleritis (MAP) | This phenotype identifies patients with scleritis and episcleritis based on a MAP clustering algorithm using ICD codes, offering an alternative approach to diagnosis. + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + intent_facets_raw: {'condition_or_topic': 'Carbohydrate transport and metabolism disorder', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Carbohydrate transport and metabolism disorder', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + planning_reasoning: ['Selected shortlisted candidates align with Carbohydrate transport and metabolism disorder as a diagnosis-oriented study intent.', 'Included Other Disorders of Carbohydrate Transport and Metabolism (Phecode) as a comorbidity covariate candidate focused on Carbohydrate Transport and Metabolism Disorders.', 'Included Disorders of Carbohydrate Transport and Metabolism (Phecode) as a comorbidity covariate candidate focused on Disorders of Carbohydrate Transport and Metabolism.', 'Included Other disorders of carbohydrate transport and metabolism (MAP) as a comorbidity covariate candidate focused on Carbohydrate Transport Metabolism.'] + recommendations: + cipher:12820 | Other Disorders of Carbohydrate Transport and Metabolism (Phecode) | This phenotype also relates to carbohydrate transport and metabolism disorders, aligning with the study's focus. + cipher:12818 | Disorders of Carbohydrate Transport and Metabolism (Phecode) | This phenotype directly addresses the study intent of patients with carbohydrate transport and metabolism disorders, as defined by the Phecode system. + cipher:3256 | Other disorders of carbohydrate transport and metabolism (MAP) | This phenotype utilizes an unsupervised clustering approach to identify patients with carbohydrate transport and metabolism disorders based on ICD codes, providing an alternative representation of the + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + intent_facets_raw: {'condition_or_topic': 'acetaminophen', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'inpatient', 'population_cue': 'hospital setting', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['inpatient'], 'population_cues': ['hospital setting']} + intent_facets_effective: {'condition_or_topic': 'acetaminophen', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'inpatient', 'population_cue': 'hospital setting', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['inpatient'], 'population_cues': ['hospital setting']} + planning_shortlist: ['ohdsi:1187'] + planning_reasoning: ['Selected shortlisted candidates align with acetaminophen as a medication based-oriented study intent.', 'Included [P] acetaminophen exposure 10 as a medication based candidate focused on acetaminophen exposure.'] + recommendations: + ohdsi:1187 | [P] acetaminophen exposure 10 | This phenotype represents acetaminophen exposure with a 30-day persistence window, directly aligning with the study intent of patients with acetaminophen exposure in the hospital setting. + +CASE 57: Patients diagnosed with dyschromia and vitiligo + intent_facets_raw: {'condition_or_topic': 'dyschromia and vitiligo', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dyschromia and vitiligo', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13900', 'cipher:2628', 'ohdsi:471', 'cipher:30727'] + planning_reasoning: ['Selected shortlisted candidates align with dyschromia and vitiligo as a diagnosis-oriented study intent.', 'Included Dyschromia and Vitiligo (Phecode) as a diagnosis candidate focused on Dyschromia and Vitiligo.', 'Included Dyschromia and Vitiligo (MAP) as a diagnosis candidate focused on Dyschromia and Vitiligo.', 'Included [P][R] Vitiligo as a diagnosis candidate focused on Vitiligo.'] + recommendations: + cipher:13900 | Dyschromia and Vitiligo (Phecode) | This Phecode definition based on ICD codes directly addresses the study's focus on Dyschromia and Vitiligo. + cipher:2628 | Dyschromia and Vitiligo (MAP) | This phenotype, generated by MAP, identifies patients based on a probability cutoff, representing a diagnosis of Dyschromia and Vitiligo. + ohdsi:471 | [P][R] Vitiligo | This phenotype represents the diagnosis of Vitiligo, aligning directly with the study intent of patients diagnosed with Dyschromia and Vitiligo. + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + intent_facets_raw: {'condition_or_topic': 'acute hepatic injury', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with no pre-existing liver disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'acute hepatic injury', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with no pre-existing liver disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293', 'cipher:18447'] + planning_reasoning: ['Selected shortlisted candidates align with acute hepatic injury as a diagnosis-oriented study intent.', 'Included [P] Acute Liver Injury indexed on diagnosis or symptoms with no chronic hepatic failure as a diagnosis candidate focused on Acute Liver Injury.', 'Included [P] Acute Hepatic Injury with no pre-existing liver disease as a diagnosis candidate focused on Acute Liver Injury.', 'Included [P] Acute Hepatic Injury or inpatient jaundice as a diagnosis candidate focused on Acute Liver Injury.'] + recommendations: + ohdsi:735 | [P] Acute Liver Injury indexed on diagnosis or symptoms with no chronic hepatic failure | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:294 | [P] Acute Hepatic Injury with no pre-existing liver disease | This phenotype directly addresses the study intent by identifying the earliest event of Acute Liver Injury in patients without pre-existing liver disease, excluding those with chronic hepatic failure. + ohdsi:293 | [P] Acute Hepatic Injury or inpatient jaundice | This phenotype captures patients with acute liver injury and includes jaundice, aligning with the study intent of identifying patients with acute liver injury, and focusing on diagnosis of Acute Liver + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + intent_facets_raw: {'condition_or_topic': 'nerve plexus lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'nerve plexus lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + planning_reasoning: ['Selected shortlisted candidates align with nerve plexus lesions as a diagnosis-oriented study intent.', 'Included Nerve Plexus Lesions (Phecode) as a diagnosis candidate focused on Nerve Plexus Lesions.', 'Included Nerve Root Lesions (Phecode) as a diagnosis candidate focused on Nerve Root Lesions.', 'Included Nerve plexus lesions (gwPheWAS) as a comorbidity covariate candidate focused on Nerve Plexus Lesions.'] + recommendations: + cipher:13084 | Nerve Plexus Lesions (Phecode) | This phenotype aligns directly with the study intent of defining patients with nerve plexus lesions using the Phecode system. + cipher:13085 | Nerve Root Lesions (Phecode) | Given the broad clinical topic of nerve plexus lesions, this Phecode definition offers a related and relevant diagnostic option. + cipher:14974 | Nerve plexus lesions (gwPheWAS) | This phenotype, derived from the Million Veteran Program, represents a comprehensive approach to identifying patients with nerve plexus lesions based on established GWAS findings. + +CASE 60: patients with a diagnosis of PRES + intent_facets_raw: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with PRES as a diagnosis-oriented study intent.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | This phenotype directly aligns with the study intent of patients with a diagnosis of PRES, representing the core condition being investigated. + +CASE 61: patients with chronic ulcerative colitis + intent_facets_raw: {'condition_or_topic': 'ulcerative colitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'ulcerative colitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201', 'cipher:30724'] + planning_reasoning: ['Selected shortlisted candidates align with ulcerative colitis as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Ulcerative colitis as a diagnosis candidate focused on Ulcerative colitis.', 'Included [P][R] Ulcerative colitis as a diagnosis candidate focused on Ulcerative colitis.', 'Included [P] Ulcerative colitis or complications as a diagnosis candidate focused on Ulcerative colitis.'] + recommendations: + ohdsi:860 | [P] Earliest event of Ulcerative colitis | This phenotype identifies the earliest diagnosis of Ulcerative colitis, aligning with the study intent of patients with chronic ulcerative colitis. + ohdsi:458 | [P][R] Ulcerative colitis | This phenotype represents all events of Ulcerative colitis, which is relevant to understanding the chronic nature of the condition. + ohdsi:201 | [P] Ulcerative colitis or complications | This phenotype captures Ulcerative colitis and associated complications, providing a comprehensive view of the patient's condition. + +CASE 62: Veteran patients with developmental disorders that are pervasive + intent_facets_raw: {'condition_or_topic': 'developmental disorders', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'developmental disorders', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:12996', 'cipher:17197', 'cipher:17193', 'cipher:3415'] + planning_reasoning: ['Selected shortlisted candidates align with developmental disorders as a diagnosis-oriented study intent.', 'Included Pervasive Developmental Disorders (Phecode) as a diagnosis candidate focused on Pervasive Developmental Disorders.', 'Included Develomental Delays and Disorders (VADC) as a comorbidity covariate candidate focused on Developmental Delays and Disorders.', 'Included Pervasive Developmental Disorders (VADC) as a comorbidity covariate candidate focused on Pervasive Developmental Disorders.'] + recommendations: + cipher:12996 | Pervasive Developmental Disorders (Phecode) | This phenotype directly addresses the study intent of identifying patients with Pervasive Developmental Disorders based on ICD codes. + cipher:17197 | Develomental Delays and Disorders (VADC) | This phenotype captures developmental delays and disorders, aligning with the broad clinical topic of 'developmental disorders' specified in the intent. + cipher:17193 | Pervasive Developmental Disorders (VADC) | This phenotype focuses on Pervasive Developmental Disorders, providing a more specific representation of the target condition. + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + intent_facets_raw: {'condition_or_topic': 'acute myocardial infarction', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'multiple diagnoses', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['multiple diagnoses'], 'care_setting_cues': ['inpatient', 'ed', 'any'], 'population_cues': ['multiple diagnoses']} + intent_facets_effective: {'condition_or_topic': 'acute myocardial infarction', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'multiple diagnoses', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['multiple diagnoses'], 'care_setting_cues': ['inpatient', 'ed', 'any'], 'population_cues': ['multiple diagnoses']} + planning_shortlist: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + planning_reasoning: ['Selected shortlisted candidates align with acute myocardial infarction as a diagnosis-oriented study intent.', 'Included [P][R] Acute myocardial infarction as a diagnosis candidate focused on Acute myocardial infarction.', 'Included [P] FDA AESI Acute Myocardial Infarction or its complications as a diagnosis candidate focused on Acute Myocardial Infarction.', 'Included Acute Myocardial Infarction (VA CAUSAL Methods) as a diagnosis candidate focused on Acute Myocardial Infarction.'] + recommendations: + ohdsi:510 | [P][R] Acute myocardial infarction | This phenotype directly identifies patients with acute myocardial infarction, aligning with the study intent of patients with at least 2 recorded diagnoses. + ohdsi:1081 | [P] FDA AESI Acute Myocardial Infarction or its complications | This phenotype represents another way to identify acute myocardial infarction diagnoses and meets the study criteria. + cipher:18982 | Acute Myocardial Infarction (VA CAUSAL Methods) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + intent_facets_raw: {'condition_or_topic': 'antiphospholipid syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'antiphospholipid syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:632', 'ohdsi:781'] + planning_reasoning: ['Selected shortlisted candidates align with antiphospholipid syndrome as a diagnosis-oriented study intent.', 'Included [P][R] Antiphospholipid syndrome as a comorbidity covariate candidate focused on Antiphospholipid syndrome.', 'Included [P] Antiphospholipid syndrome as a comorbidity covariate candidate focused on Antiphospholipid Syndrome.'] + recommendations: + ohdsi:632 | [P][R] Antiphospholipid syndrome | This phenotype represents all events of Antiphospholipid syndrome, aligning with the study intent of patients diagnosed with this condition in the outpatient setting. + ohdsi:781 | [P] Antiphospholipid syndrome | This phenotype represents the first occurrence of Antiphospholipid syndrome, a relevant comorbidity covariate for the specified patient population in an outpatient setting. + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + intent_facets_raw: {'condition_or_topic': 'ADRD or dementia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + intent_facets_effective: {'condition_or_topic': 'ADRD or dementia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + planning_shortlist: ['ohdsi:864', 'ohdsi:651', 'cipher:31120'] + planning_reasoning: ['Selected shortlisted candidates align with ADRD or dementia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Dementia as a diagnosis candidate focused on Dementia.', 'Included [P][R] Dementia as a diagnosis candidate focused on Dementia.', 'Included Dementia Alzheimer Vascular Mixed Nonspecific (Hoile) as a diagnosis candidate focused on Dementia.'] + recommendations: + ohdsi:864 | [P] Earliest event of Dementia | This phenotype directly identifies dementia, aligning with the study intent of older adults with a likely diagnosis of ADRD or late-stage dementia. + ohdsi:651 | [P][R] Dementia | This phenotype represents Dementia, a condition primarily used for diagnosis, and is a relevant phenotype for the study intent. + cipher:31120 | Dementia Alzheimer Vascular Mixed Nonspecific (Hoile) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 66: patients who experienced a GI bleed adverse event + intent_facets_raw: {'condition_or_topic': 'Gastrointestinal bleeding', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Gastrointestinal bleeding', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:888', 'ohdsi:417', 'ohdsi:349', 'ohdsi:77'] + planning_reasoning: ['Selected shortlisted candidates align with Gastrointestinal bleeding as a outcome-oriented study intent.', 'Included [P] Gastrointestinal bleeding as a outcome candidate focused on Gastrointestinal Bleeding.', 'Included [P] Acute gastrointestinal bleeding events as a outcome candidate focused on Gastrointestinal Bleeding.', 'Included [P] Lower gastrointestinal bleeding events as a outcome candidate focused on Gastrointestinal Bleeding.'] + recommendations: + ohdsi:888 | [P] Gastrointestinal bleeding | This phenotype identifies patients experiencing Gastrointestinal Bleeding, primarily in inpatient or ER settings. It is an outcome measure as specified in the study intent. + ohdsi:417 | [P] Acute gastrointestinal bleeding events | Represents events of gastrointestinal bleed, primarily an outcome measure. Aligns with the study intent of identifying patients with gastrointestinal bleed events. + ohdsi:349 | [P] Lower gastrointestinal bleeding events | Represents lower gastrointestinal bleeding events, a patient outcome. Relevant to the study intent of investigating gastrointestinal bleeding. + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + intent_facets_raw: {'condition_or_topic': 'COVID-19', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'COVID-19', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:678', 'cipher:31308'] + planning_reasoning: ['Selected shortlisted candidates align with COVID-19 as a diagnosis-oriented study intent.', 'Included [P][R] COVID-19 as a diagnosis candidate focused on COVID-19.', 'Included Confirmed COVID-19 Diagnoses (Knight) as a diagnosis candidate focused on Confirmed COVID-19.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:47.'] + recommendations: + ohdsi:678 | [P][R] COVID-19 | This phenotype represents the diagnosis of COVID-19, aligning directly with the study intent of patients who received a COVID-19 diagnosis in the outpatient setting. + cipher:31308 | Confirmed COVID-19 Diagnoses (Knight) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 68: veterans who experienced an abdominal aortic aneurysm + intent_facets_raw: {'condition_or_topic': 'abdominal aortic aneurysm', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veterans', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'abdominal aortic aneurysm', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veterans', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['cipher:29240', 'cipher:29169'] + planning_reasoning: ['Selected shortlisted candidates align with abdominal aortic aneurysm as a diagnosis-oriented study intent.', 'Included Abdominal Aortic Aneurysm (PheKB) as a diagnosis candidate focused on Abdominal Aortic Aneurysm (AAA).', 'Included Abdominal Aortic Aneurysm (HDR UK) as a diagnosis candidate focused on Abdominal Aortic Aneurysm (AAA).', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:1093, ohdsi:866, ohdsi:1290.'] + recommendations: + cipher:29240 | Abdominal Aortic Aneurysm (PheKB) | This phenotype directly addresses the study intent of identifying veterans with abdominal aortic aneurysms, based on diagnostic codes and procedural history. + cipher:29169 | Abdominal Aortic Aneurysm (HDR UK) | This phenotype also identifies patients diagnosed with an Abdominal Aortic Aneurysm, utilizing a variety of codes and records relevant to the study's focus. + +CASE 69: patients with COPD according to diagnostic codes in the EHR + intent_facets_raw: {'condition_or_topic': 'COPD', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'COPD', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:29794', 'cipher:4241', 'cipher:29756', 'cipher:31297'] + planning_reasoning: ['Selected shortlisted candidates align with COPD as a diagnosis-oriented study intent.', 'Included Chronic Obstructive Pulmonary Disease, ICD, Read, and Med Codes (HDR UK) as a diagnosis candidate focused on COPD.', 'Included Chronic Obstructive Pulmonary Disease, (VINCI) as a diagnosis candidate focused on COPD.', 'Included Chronic Obstructive Pulmonary Disease (HR UK) as a diagnosis candidate focused on Chronic Obstructive Pulmonary Disease (COPD).'] + recommendations: + cipher:29794 | Chronic Obstructive Pulmonary Disease, ICD, Read, and Med Codes (HDR UK) | This phenotype directly addresses the study intent by identifying patients with COPD based on ICD, Read, and Med Code diagnoses. + cipher:4241 | Chronic Obstructive Pulmonary Disease, (VINCI) | This phenotype aligns with the study's focus on identifying patients diagnosed with COPD within the VA system. + cipher:29756 | Chronic Obstructive Pulmonary Disease (HR UK) | This phenotype represents a diagnosis of COPD based on multiple coding systems, relevant to the study's diagnostic criteria. + +CASE 70: patients hospitalized at least once for heart failure + intent_facets_raw: {'condition_or_topic': 'heart failure', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'hospitalized', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'heart failure', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'hospitalized', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + planning_reasoning: ['Selected shortlisted candidates align with heart failure as a diagnosis-oriented study intent.', 'Included [P] Heart failure2 as a diagnosis candidate focused on Heart Failure.', 'Included [P] Acute Heart failure from legend as a diagnosis candidate focused on Heart Failure.', 'Included Heart Failure (BOS CSPCC) as a diagnosis candidate focused on Heart Failure.'] + recommendations: + ohdsi:934 | [P] Heart failure2 | This phenotype represents the initial diagnosis of Heart Failure, aligning with the study intent of identifying patients hospitalized for heart failure. + ohdsi:1303 | [P] Acute Heart failure from legend | This phenotype identifies the first recorded episode of heart failure in a patient, followed by at least one subsequent heart failure condition record, fitting the study intent. + cipher:16152 | Heart Failure (BOS CSPCC) | This phenotype identifies patients with Heart Failure based on ICD-10 diagnostic codes, relevant to the study's focus. + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + intent_facets_raw: {'condition_or_topic': 'diabetes', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'diabetes', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:30803', 'cipher:31250', 'cipher:31195'] + planning_reasoning: ['Selected shortlisted candidates align with diabetes as a medication based-oriented study intent.', 'Included Meglitinide (Kontopantelis) as a medication based candidate focused on Meglitinide Use in Type 2 Diabetes.', 'Included Diabetes and Diabates Medication (Knight) as a comorbidity covariate candidate focused on Diabetes.', 'Included Diabetes, Drug Code (Paige) as a comorbidity covariate candidate focused on Diabetes.'] + recommendations: + cipher:30803 | Meglitinide (Kontopantelis) | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + cipher:31250 | Diabetes and Diabates Medication (Knight) | This phenotype directly addresses diabetes as a comorbidity covariate and aligns with the study intent of identifying patients with diabetes. + cipher:31195 | Diabetes, Drug Code (Paige) | This phenotype uses Read codes v2, a common approach for identifying diabetes as a comorbidity covariate, fitting the study intent. + +CASE 1: Patients with an implanted cardiac defibrillator + cipher:2288 | 15.6325 | [('topic_primary', 'Cardiac defibrillator in situ')] + cipher:13288 | 15.1125 | [('topic_primary', 'Cardiac Defibrillator in Situ')] + cipher:30773 | -5.3875 | [('topic_mismatch', 'Trifascicular Block')] + cipher:16289 | -5.3875 | [('topic_mismatch', 'Bleeding')] + cipher:30192 | -5.3875 | [('topic_mismatch', 'Cardiomyopathy')] + cipher:31291 | -12.3875 | [('topic_mismatch', 'Life Threatening Arrhythmias')] + cipher:30617 | -12.8875 | [('topic_mismatch', 'Coronary Heart Disease')] + ohdsi:1102 | -21.65 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 2: Patients diagnosed with fasciitis + cipher:15684 | 35.6725 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:2703 | 35.6325 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:14029 | 28.1125 | [('topic_primary', 'Fasciitis')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.75 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + +CASE 3: Patients with acute prostatitis + ohdsi:283 | 34.35 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:13720 | 28.9425 | [('topic_primary', 'Acute Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Prostatitis"]}')] + cipher:2054 | 28.8625 | [('topic_primary', 'Acute Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Prostatitis"]}')] + cipher:15499 | 15.1725 | [('topic_primary', 'Acute Prostatitis')] + cipher:18650 | 13.7554 | [('topic_primary', 'Chronic Prostatitis or Chronic Pelvic Pain Syndrome (MVP)')] + ohdsi:1301 | -5.21 | [('topic_mismatch', 'Urinary Tract Infection')] + ohdsi:410 | -5.23 | [('topic_mismatch', 'Urinary Tract Infection')] + cipher:30181 | -5.3875 | [('topic_mismatch', 'Non-Acute Cystitis')] + +CASE 4: Patients who underwent esophagectomy + ohdsi:1097 | 32.1233 | [('topic_primary', 'Esophagectomy'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Esophagectomy"]}')] + ohdsi:1294 | 26.33 | [('topic_primary', 'Esophagectomy')] + ohdsi:870 | 26.31 | [('topic_primary', 'Esophagectomy')] + ohdsi:1309 | 22.35 | [('topic_primary', 'Esophagectomy')] + ohdsi:877 | -5.75 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1289 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -9.25 | [('topic_mismatch', 'Surgery')] + cipher:30087 | -9.3875 | [('topic_mismatch', 'Primary Malignancy, Oesophageal')] + +CASE 5: Patients diagnosed with peripheral neuritis + ohdsi:388 | 35.85 | [('topic_primary', 'Peripheral neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral neuritis"]}')] + ohdsi:389 | 20.08 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathy"]}')] + ohdsi:238 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + ohdsi:540 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + cipher:30768 | 19.8825 | [('topic_primary', 'Peripheral Neuropathies'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathies"]}')] + ohdsi:236 | 19.6233 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic Peripheral Neuropathy"]}')] + ohdsi:541 | 18.31 | [('topic_primary', 'Idiopathic peripheral neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic peripheral neuropathy"]}')] + ohdsi:623 | -3.75 | [('topic_mismatch', 'Motor neuropathy')] + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + ohdsi:760 | 27.6871 | [('topic_primary', 'IL-23 Inhibitors')] + ohdsi:1042 | 27.6271 | [('topic_primary', 'IL-23 inhibitors')] + ohdsi:1040 | 27.6071 | [('topic_primary', 'TNF alpha inhibitors')] + ohdsi:1069 | 25.5 | [('topic_primary', 'TNF inhibitors'), ('topic_context', '{"context_conditions": ["Crohns disease"], "target_conditions": ["TNF inhibitors"]}')] + ohdsi:759 | 23.6671 | [('topic_primary', 'TNF-alpha Inhibitors, IL23 Inhibitors')] + ohdsi:1057 | 20.3614 | [('topic_primary', 'IL-23 inhibitors'), ('topic_context', '{"context_conditions": ["Plaque psoriasis", "Psoriasis vulgaris"], "target_conditions": ["IL23 inhibitors"]}')] + ohdsi:1066 | 19.9881 | [('topic_primary', 'Tumor Necrosis Factor alpha (TNFa) inhibitors')] + ohdsi:1068 | 19.75 | [('topic_primary', 'Tumor Necrosis Factor alpha (TNFa) inhibitors'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Tumor Necrosis Factor alpha (TNFa) inhibitors"]}')] + +CASE 7: Patients with allergic rhinitis + ohdsi:508 | 34.35 | [('topic_primary', 'Allergic rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic rhinitis"]}')] + ohdsi:367 | 34.33 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:2081 | 34.1125 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:30258 | 28.6725 | [('topic_primary', 'Allergic and Chronic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic and chronic rhinitis diagnosis"]}')] + cipher:13411 | 26.6125 | [('topic_primary', 'Allergic Rhinitis')] + ohdsi:12 | 21.7067 | [('topic_primary', 'Rhinitis'), ('topic_context', '{"context_conditions": ["Common cold", "Sinusitis", "Respiratory Symptoms"], "target_conditions": ["Rhinitis"]}')] + cipher:15246 | 19.1125 | [('topic_primary', 'Allergic Rhinitis')] + ohdsi:370 | 14.75 | [('topic_primary', 'Allergic Disorder')] + +CASE 8: Patients with ischemic heart disease + ohdsi:654 | 34.31 | [('topic_primary', 'Ischemic heart disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic heart disease"]}')] + cipher:16261 | 34.1125 | [('topic_primary', 'Ischemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic Heart Disease"]}')] + cipher:29560 | 34.0375 | [('topic_primary', 'Ischemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic Heart Disease"]}')] + cipher:29218 | 23.6525 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:29772 | 23.6125 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:30610 | 23.6125 | [('topic_primary', 'Ischaemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischaemic Heart Disease"]}')] + cipher:30617 | 11.2125 | [('topic_primary', 'Coronary Heart Disease')] + cipher:31868 | 9.7792 | [('topic_primary', 'Chronic Ischaemic Heart Disease')] + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + cipher:13824 | 18.1725 | [('topic_primary', 'Early Labor Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Early Labor Hemorrhage"]}')] + cipher:2643 | 16.6411 | [('topic_primary', 'Early or threatened labor; hemorrhage in early pregnancy')] + cipher:2798 | 13.7375 | [('topic_primary', 'Hemorrhage in early pregnancy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hemorrhage in early pregnancy"]}')] + cipher:17376 | 10.4458 | [('topic_primary', 'Hemorrhage')] + cipher:15566 | 9.3625 | [('topic_primary', 'Pregnancy Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pregnancy Hemorrhage"]}')] + cipher:13827 | 4.4458 | [('topic_primary', 'Hemorrhage in Early Pregnancy')] + ohdsi:677 | 4.375 | [('topic_primary', 'Preterm labor with preterm delivery'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Preterm labor with preterm delivery"]}')] + cipher:2796 | 3.7792 | [('topic_primary', 'Hemorrhage during pregnancy; childbirth and postpartum')] + +CASE 10: Patients who underwent lung resection + ohdsi:1268 | 30.1433 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative Afib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:1308 | 26.0833 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative AFib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:869 | 24.29 | [('topic_primary', 'Lung Resection')] + ohdsi:1293 | 24.27 | [('topic_primary', 'Lung Resection')] + ohdsi:1289 | -7.75 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -7.75 | [('topic_mismatch', 'Surgery')] + ohdsi:877 | -7.75 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1106 | -7.75 | [('topic_mismatch', 'Surgery')] + +CASE 11: Patients with laryngitis + ohdsi:355 | 34.35 | [('topic_primary', 'Laryngitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Laryngitis"]}')] + cipher:2046 | 20.6125 | [('topic_primary', 'Acute laryngitis and tracheitis')] + ohdsi:327 | -5.19 | [('topic_mismatch', 'Pharyngitis')] + ohdsi:9 | -5.25 | [('topic_mismatch', 'Sore throat')] + ohdsi:352 | -5.25 | [('topic_mismatch', 'Inflamed Tonsils')] + cipher:29206 | -5.3075 | [('topic_mismatch', 'Peritonsillar Abscess')] + cipher:29553 | -5.3675 | [('topic_mismatch', 'Sleep Apnea')] + cipher:31166 | -5.3875 | [('topic_mismatch', 'Sore Throat')] + +CASE 12: Patients with regional enteritis + cipher:3534 | 35.6725 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + cipher:13571 | 28.1125 | [('topic_primary', 'Regional Enteritis')] + cipher:15376 | 21.1125 | [('topic_primary', 'Regional Enteritis')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:501 | -3.75 | [('topic_mismatch', "Crohn's disease")] + cipher:30115 | -3.8875 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + +CASE 13: Patients with renal sclerosis + cipher:13646 | 28.9025 | [('topic_primary', 'Nephritis Nephrosis Renal Sclerosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nephritis Nephrosis Renal Sclerosis"]}')] + cipher:13656 | 26.6725 | [('topic_primary', 'Renal Sclerosis')] + cipher:17322 | 19.1125 | [('topic_primary', 'Renal Sclerosis')] + ohdsi:1003 | 18.5 | [('topic_primary', 'Renal cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal cancer"]}')] + ohdsi:481 | 16.75 | [('topic_primary', 'Renal failure syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal failure syndrome"]}')] + ohdsi:467 | 7.25 | [('topic_primary', 'Systemic sclerosis')] + cipher:31257 | 7.1125 | [('topic_primary', 'Renal disease')] + cipher:30292 | -5.3875 | [('topic_mismatch', 'Glomerulonephritis')] + +CASE 14: Patients with cardiomyopathy + cipher:30192 | 32.9625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 32.5058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 29.08 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 28.9025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 27.5292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 19.1325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + cipher:31280 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + +CASE 15: Patients with a diagnosis of PRES + ohdsi:223 | 21.95 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:1317 | -3.69 | [('topic_mismatch', 'Reye’s syndrome')] + ohdsi:516 | -3.71 | [('topic_mismatch', 'Thrombotic microangiopathy')] + ohdsi:248 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:520 | -3.75 | [('topic_mismatch', 'Hypertensive disorder')] + ohdsi:229 | -3.75 | [('topic_mismatch', 'Progressive Multifocal Leukoencephalopathy (PML)')] + ohdsi:1084 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + +CASE 16: Patients with anorexia nervosa + ohdsi:1340 | 34.29 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:17187 | 34.1125 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:2117 | 34.1125 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:12990 | 26.6125 | [('topic_primary', 'Anorexia Nervosa')] + ohdsi:1339 | 18.5 | [('topic_primary', 'Bulimia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Bulimia Nervosa"]}')] + cipher:30163 | 5.9425 | [('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia and Bulimia Nervosa"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1341 | -5.25 | [('topic_mismatch', 'Eating Disorders')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + +CASE 17: Patients with dizziness, vertigo, or motion sickness + cipher:3402 | 19.8625 | [('topic_primary', 'Vertigo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vertigo"]}')] + cipher:13215 | 18.8125 | [('topic_primary', 'Dizziness and Giddiness (Lightheadedness and Vertigo)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dizziness and Giddiness (Lightheadedness and Vertigo)"]}')] + ohdsi:893 | 16.29 | [('topic_primary', 'Vertigo')] + cipher:2623 | 14.7792 | [('topic_primary', 'Dizziness and giddiness (Light-headedness and vertigo)')] + ohdsi:244 | 12.35 | [('topic_primary', 'Dizziness')] + cipher:15084 | 6.7792 | [('topic_primary', 'Dizziness and giddiness')] + cipher:4387 | -3.8675 | [('topic_mismatch', 'Vestibular Disorders')] + ohdsi:891 | -7.69 | [('topic_mismatch', 'Nausea')] + +CASE 18: Patients with polymyalgia rheumatica + cipher:30277 | 34.2125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + cipher:13992 | 26.6325 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:3460 | 19.1925 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:17453 | 19.1125 | [('topic_primary', 'Polymyalgia Rheumatica')] + ohdsi:670 | -5.25 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -5.25 | [('topic_mismatch', 'Joint stiffness')] + ohdsi:605 | -12.25 | [('topic_mismatch', 'Muscle pain')] + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + cipher:2915 | 18.1125 | [('topic_primary', 'Insulin Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Insulin Adverse Effects"]}')] + cipher:2064 | 15.3925 | [('topic_primary', 'Adrenal Cortical Steroids Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Adrenal Cortical Steroids Adverse Effects"]}')] + cipher:2123 | 15.3125 | [('topic_primary', 'Lipid-Lowering Drug Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Lipid-Lowering Drug Adverse Effects"]}')] + cipher:3573 | 13.1125 | [('topic_primary', 'Salicylates adverse effects')] + cipher:2125 | 12.6125 | [('topic_primary', 'Antirheumatics adverse effects')] + cipher:14303 | 11.8392 | [('topic_primary', 'Adrenal Steroid Adverse Effects')] + cipher:17565 | 11.0192 | [('topic_primary', 'Adverse Effects of Adrenal Steroids')] + cipher:3180 | 10.4792 | [('topic_primary', 'Opiate Use and Adverse Effects')] + +CASE 20: Patients with low blood pressure + cipher:13390 | 35.6125 | [('topic_primary', 'Hypotension'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypotension"]}')] + ohdsi:339 | 21.33 | [('topic_primary', 'Hypotension')] + ohdsi:890 | 21.31 | [('topic_primary', 'Hypotension')] + ohdsi:526 | 17.29 | [('topic_primary', 'Orthostatic hypotension')] + ohdsi:997 | 9.35 | [('topic_primary', 'Hypotension')] + ohdsi:954 | -3.75 | [('topic_mismatch', 'Syncope')] + ohdsi:1075 | -3.75 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:445 | -10.75 | [('topic_mismatch', 'Hypoglycemia')] + +CASE 21: Patients with encephalopathy + ohdsi:194 | 34.33 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + cipher:2664 | 34.1125 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:331 | 32.31 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:223 | 25.7867 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)'), ('topic_context', '{"context_conditions": ["Eclampsia", "Hypertensive encephalopathy"], "target_conditions": ["Posterior reversible encepha... [truncated 19 chars]')] + ohdsi:936 | -2.3929 | [('topic_context', '{"context_conditions": ["Hepatic necrosis", "Hepatic coma", "Hepatic encephalopathy", "Liver failure", "Liver injury"], ... [truncated 41 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1333 | -5.21 | [('topic_mismatch', 'Advanced Liver Disease')] + ohdsi:544 | -5.25 | [('topic_mismatch', 'Encephalitis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + +CASE 22: Patients with birdshot chorioretinitis + ohdsi:1223 | 8.0167 | [('topic_context', '{"context_conditions": ["Uveitis"], "target_conditions": ["Birdshot chorioretinitis"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1226 | -3.73 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -3.75 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:755 | -3.75 | [('topic_mismatch', 'Uveitis')] + cipher:30185 | -3.8875 | [('topic_mismatch', 'Posterior Uveitis')] + cipher:13118 | -3.8875 | [('topic_mismatch', 'Chorioretinal Inflammations Scars')] + cipher:2341 | -3.8875 | [('topic_mismatch', 'Chorioretinal inflammations, scars, and other disorders of choroid')] + ohdsi:1225 | -10.69 | [('topic_mismatch', 'Uveitis')] + +CASE 23: Older adults with macular degeneration + cipher:30295 | 34.1725 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3006 | 34.1325 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3005 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:2505 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:16256 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:14995 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3007 | 30.6125 | [('topic_primary', 'Macular Degeneration, Wet'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration, Wet"]}')] + ohdsi:536 | 29.1 | [('topic_primary', 'Age related macular degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Age related macular degeneration"]}')] + +CASE 24: Patients with autoimmune hemolytic anemia + cipher:18441 | 35.6925 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:12888 | 35.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune Hemolytic Anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.81 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:17112 | 17.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemias')] + ohdsi:497 | 16.5 | [('topic_primary', 'Autoimmune hepatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hepatitis"]}')] + cipher:18439 | 16.3625 | [('topic_primary', 'Aplastic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Aplastic Anemia"]}')] + +CASE 25: Patients with MSI-low rectal adenocarcinoma + ohdsi:845 | 13.27 | [('topic_primary', 'Rectal Adenocarcinoma'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Rectal Adenocarcinoma"]}')] + ohdsi:844 | 6.83 | [('topic_primary', 'Rectal Cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Rectal Cancer"]}')] + ohdsi:812 | -4.21 | [('topic_context', '{"context_conditions": [], "target_conditions": ["Primary Adenocarcinoma"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:843 | -4.775 | [('topic_context', '{"context_conditions": [], "target_conditions": ["primary adenocarcinoma of rectum"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:821 | -5.0829 | [('topic_context', '{"context_conditions": [], "target_conditions": ["Primary adenocarcinoma of the colon or rectum"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:819 | -9.0833 | [('topic_context', '{"context_conditions": ["MSI-L", "MSI-indeterminate", "MSS", "pMMR"], "target_conditions": ["Colorectal Cancer"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:823 | -11.75 | [('topic_mismatch', 'Colorectal Cancer')] + ohdsi:836 | -11.75 | [('topic_mismatch', 'colorectal cancer')] + +CASE 26: Patients with blistering skin lesions + ohdsi:652 | 13.875 | [('topic_primary', 'Vasculitis of the skin'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vasculitis of the skin"]}')] + ohdsi:376 | 6.6433 | [('topic_primary', 'Bleeding Skin')] + ohdsi:1168 | 5.54 | [('topic_primary', 'Skin Ulcer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Skin Ulcer"]}')] + ohdsi:414 | 5.25 | [('topic_primary', 'Skin Eruption Symptoms')] + ohdsi:948 | -3.73 | [('topic_mismatch', 'Rash')] + ohdsi:461 | -3.75 | [('topic_mismatch', 'Erythema multiforme')] + ohdsi:462 | -3.75 | [('topic_mismatch', 'Lichen planus')] + cipher:4016 | -3.8875 | [('topic_mismatch', 'Desquamative Rash')] + +CASE 27: Patients with stomatitis or mucositis + cipher:3657 | 27.7575 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 15.8792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 15.8392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 15.3592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 15.3192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 15.2792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -3.8875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -10.8875 | [('topic_mismatch', 'Open Wound')] + +CASE 28: Patients with neurofibromatosis type 1 + ohdsi:697 | 35.85 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:304 | 35.83 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:305 | 35.77 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:696 | 20.81 | [('topic_primary', 'Neurofibromatosis type 2')] + cipher:12649 | 20.1125 | [('topic_primary', 'Neurofibromatosis')] + ohdsi:698 | 20.04 | [('topic_primary', 'Neurofibromatosis syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis syndrome"]}')] + ohdsi:308 | 15.6667 | [('topic_primary', 'Neurofibromatosis'), ('topic_context', '{"context_conditions": ["MRI of Brain", "Ophthalmology Visits"], "target_conditions": ["Neurofibromatosis"]}')] + ohdsi:306 | 13.85 | [('topic_primary', 'Optic Pathway Glioma and Neurofibromatosis')] + +CASE 29: Patients with keloid scars + cipher:13930 | 16.1125 | [('topic_primary', 'Keloid Scar')] + cipher:15610 | 8.6725 | [('topic_primary', 'Keloid Scar')] + cipher:2950 | 8.6125 | [('topic_primary', 'Keloid Scar')] + cipher:18443 | -10.8875 | [('topic_mismatch', 'Severe Cutaneous Adverse Reaction (SCAR)')] + cipher:30650 | -11.3875 | [('topic_mismatch', 'Smoking Status')] + ohdsi:1168 | -14.67 | [('topic_mismatch', 'Skin Ulcer')] + ohdsi:1215 | -14.71 | [('topic_mismatch', 'Cancer')] + ohdsi:1102 | -20.23 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 30: Patients with acetaminophen exposure + ohdsi:1187 | 32.85 | [('topic_primary', 'acetaminophen exposure')] + ohdsi:1427 | 28.5 | [('topic_primary', 'Acamprosate Exposure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acamprosate Exposure"]}')] + ohdsi:1158 | 20.83 | [('topic_primary', 'Aspirin Exposure')] + cipher:31254 | -15.3875 | [('topic_mismatch', 'Liver Disease')] + cipher:30616 | -15.3875 | [('topic_mismatch', 'Substance Misuse')] + ohdsi:1423 | -17.73 | [('topic_mismatch', 'Acute Intoxication')] + ohdsi:735 | -17.75 | [('topic_mismatch', 'Acute Liver Injury')] + ohdsi:1425 | -17.75 | [('topic_mismatch', 'Alcohol Intoxication')] + +CASE 31: Patients exposed to rifamycin antibiotics + ohdsi:1207 | 24.0833 | [('topic_primary', 'Antibiotics - Monobactams'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Antibiotics - Monobactams", "Aztreonam"]}')] + ohdsi:1203 | 20.75 | [('topic_primary', 'Antibiotics Cephalosporins')] + ohdsi:1211 | 0.85 | [('topic_mismatch', 'Rifamycins')] + ohdsi:1213 | 0.83 | [('topic_mismatch', 'Streptogramins')] + ohdsi:1206 | 0.81 | [('topic_mismatch', 'Macrolide Drug Exposure')] + ohdsi:1202 | 0.77 | [('topic_mismatch', 'Carbapenems')] + ohdsi:1210 | 0.75 | [('topic_primary', 'Antibiotics Persistence')] + ohdsi:1212 | 0.75 | [('topic_mismatch', 'Sulfonamides')] + +CASE 32: Patients with a joint or ligament sprain + cipher:14236 | 27.1925 | [('topic_primary', 'Joint Ligament Sprain'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Joint Ligament Sprain"]}')] + cipher:3569 | 26.2375 | [('topic_primary', 'Rotator cuff (capsule) sprain'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Rotator cuff (capsule) sprain"]}')] + cipher:2944 | 15.2125 | [('topic_primary', 'Ligament sprain')] + cipher:15814 | 13.8192 | [('topic_primary', 'Joint Ligament Sprain')] + cipher:3072 | 13.7792 | [('topic_primary', 'Muscle-tendon sprain')] + cipher:15813 | 13.1125 | [('topic_primary', 'Rotator cuff (capsule) sprain')] + ohdsi:363 | -5.23 | [('topic_mismatch', 'Joint stiffness')] + cipher:29559 | -9.3275 | [('topic_mismatch', 'Musculoskeletal Pain and Injury')] + +CASE 33: Pregnant patients with miscarriage or stillbirth + cipher:3056 | 31.6525 | [('topic_primary', 'Miscarriage; Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:15565 | 31.6125 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:13818 | 31.6125 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + ohdsi:627 | 21.33 | [('topic_primary', 'Miscarriage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage"]}')] + ohdsi:1432 | 16.31 | [('topic_primary', 'Stillbirth')] + ohdsi:606 | 16.25 | [('topic_primary', 'Stillbirth')] + ohdsi:1434 | -7.65 | [('topic_mismatch', 'Pregnancy Loss')] + ohdsi:1431 | -7.73 | [('topic_mismatch', 'Ectopic Pregnancy')] + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + cipher:13354 | 19.2775 | [('topic_primary', 'Arterial Embolism and Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism and Thrombosis"]}')] + cipher:2142 | 19.2375 | [('topic_primary', 'Arterial Embolism and Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism and Thrombosis"]}')] + cipher:31817 | 15.6125 | [('topic_primary', 'Embolism or Thrombosis')] + cipher:31293 | 13.6125 | [('topic_primary', 'Arterial Embolism and Thrombosis')] + cipher:15204 | 13.6125 | [('topic_primary', 'Arterial Embolism and Thrombosis')] + cipher:31819 | 12.6925 | [('topic_primary', 'Arterial Embolism, Upper Extremity'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism"]}')] + ohdsi:1090 | 6.25 | [('topic_primary', 'Pulmonary Embolism'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pulmonary Embolism"]}')] + cipher:31820 | -12.7875 | [('topic_mismatch', 'Lower Extremity Vascular Disease')] + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + cipher:31223 | 34.1125 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + cipher:30639 | 34.1125 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1301 | 30.6389 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": ["Cystitis", "Asymptomatic bacteriuria"], "target_conditions": ["Acute Urinary tract infections U... [truncated 12 chars]')] + ohdsi:1186 | 30.31 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:861 | 21.0 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": ["Pyuria", "Bacteriuria", "Cystitis"], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1061 | -9.15 | [('topic_mismatch', 'Cephalosporin Exposure')] + ohdsi:1060 | -9.17 | [('topic_mismatch', 'Fluoroquinolone Use')] + ohdsi:1044 | -9.21 | [('topic_mismatch', 'Cephalosporin Use')] + +CASE 36: Patients hospitalized with preinfarction syndrome + ohdsi:939 | -1.4357 | [('topic_context', '{"context_conditions": ["Preinfarction Syndrome", "Emergency Room Visit", "Inpatient Visit"], "target_conditions": ["Hos... [truncated 15 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1081 | -1.73 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30748 | -1.8875 | [('topic_mismatch', 'Myocardial Infarction')] + cipher:29772 | -5.3875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:30101 | -5.3875 | [('topic_mismatch', 'Myocardial Infarction')] + ohdsi:263 | -8.75 | [('topic_mismatch', 'Unstable Angina and NSTEMI')] + ohdsi:260 | -8.75 | [('topic_mismatch', 'ST elevation myocardial infarction')] + cipher:30617 | -12.8275 | [('topic_mismatch', 'Coronary Heart Disease')] + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + cipher:3412 | 5.0325 | [('topic_primary', 'Personal history of diseases of blood and blood-forming organs')] + ohdsi:738 | -3.75 | [('topic_mismatch', 'Autoimmune hemolytic anemia')] + cipher:18428 | -3.8475 | [('topic_mismatch', 'Pancytopenia')] + cipher:30246 | -3.8875 | [('topic_mismatch', 'Aplastic Anaemias')] + cipher:30138 | -3.8875 | [('topic_mismatch', 'Hyposplenism')] + cipher:30287 | -3.8875 | [('topic_mismatch', 'Myelodysplastic Syndromes')] + cipher:29220 | -3.8875 | [('topic_mismatch', 'Anemias, Other')] + cipher:30672 | -3.8875 | [('topic_mismatch', 'Thalassaemia Trait')] + +CASE 38: Patients with benign pancreatic conditions + cipher:16955 | 20.7125 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16954 | 20.6725 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16947 | 20.6125 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16952 | 19.2775 | [('topic_primary', 'Pancreatic Cancer'), ('topic_context', '{"context_conditions": ["PSC", "IBD"], "target_conditions": ["Pancreatic Cancer"]}')] + cipher:16953 | 8.6125 | [('topic_primary', 'Pancreatic Inflammation')] + ohdsi:496 | -3.75 | [('topic_mismatch', 'Abdominal Pain')] + cipher:30223 | -3.8875 | [('topic_mismatch', 'Benign Neoplasm of Stomach and Duodenum')] + cipher:30238 | -3.8875 | [('topic_mismatch', 'Pancreatitis')] + +CASE 39: Patients with primary localized osteoarthritis + cipher:3192 | 34.1525 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9425 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1325 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.75 | [('topic_primary', 'Osteoarthritis')] + +CASE 40: New users of dihydropyridine calcium channel blockers + ohdsi:1047 | 44.33 | [('topic_primary', 'dihydropyridine calcium channel blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["dihydropyridine calcium channel blockers"]}')] + ohdsi:1048 | 36.85 | [('topic_primary', 'dihydropyridine calcium channel blockers')] + ohdsi:1036 | 23.31 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1049 | 22.645 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Hypertension", "Essential Hypertension"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1052 | 22.54 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Acute Myocardial Infarction"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1046 | 4.75 | [('topic_mismatch', 'Thiazide diuretics')] + ohdsi:1035 | 2.75 | [('topic_mismatch', 'Thiazide diuretics')] + cipher:30152 | -9.3875 | [('topic_mismatch', 'Hypertension')] + +CASE 41: Veteran patients with renal sclerosis + cipher:13656 | 28.1125 | [('topic_primary', 'Renal Sclerosis')] + cipher:17322 | 21.7125 | [('topic_primary', 'Renal Sclerosis')] + cipher:18902 | 9.6125 | [('topic_primary', 'Renal Failure')] + cipher:31257 | 8.6125 | [('topic_primary', 'Renal disease')] + cipher:30611 | 6.6125 | [('topic_primary', 'End-Stage Renal Disease')] + cipher:16003 | -3.8875 | [('topic_mismatch', 'Chronic Kidney Disease')] + ohdsi:964 | -11.19 | [('topic_mismatch', 'Chronic Kidney Disease')] + cipher:31686 | -11.3675 | [('topic_mismatch', 'Chronic Kidney Disease')] + +CASE 42: Veteran patients with polymyalgia rheumatica + cipher:30277 | 34.2125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + cipher:13992 | 26.6125 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:17453 | 20.1925 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:3460 | 19.1725 | [('topic_primary', 'Polymyalgia Rheumatica')] + ohdsi:670 | -5.23 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -5.25 | [('topic_mismatch', 'Joint stiffness')] + cipher:30630 | -12.8875 | [('topic_mismatch', 'Rheumatoid Arthritis')] + +CASE 43: Veteran patients with autoimmune hemolytic anemia + cipher:18441 | 35.6925 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:12888 | 35.6125 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune Hemolytic Anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.81 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:17112 | 18.6325 | [('topic_primary', 'Autoimmune Hemolytic Anemias')] + cipher:18439 | 16.3625 | [('topic_primary', 'Aplastic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Aplastic Anemia"]}')] + cipher:2178 | 14.7792 | [('topic_primary', 'Autoimmune hemolytic anemias (MAP)')] + +CASE 44: Veteran patients with cardiac complications + ohdsi:1081 | -3.75 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30192 | -3.8875 | [('topic_mismatch', 'Cardiomyopathy')] + cipher:29218 | -3.8875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:16294 | -9.8875 | [('topic_mismatch', 'Cardiovascular Disease Mortality')] + cipher:16189 | -9.8875 | [('topic_mismatch', 'COVID-19 Severity')] + cipher:16278 | -11.3275 | [('topic_mismatch', 'VA Administrative Data')] + cipher:30617 | -11.3675 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:16275 | -11.3875 | [('topic_mismatch', 'Chronic Pulmonary Disease')] + +CASE 45: Patients diagnosed with fasciitis + cipher:15684 | 35.6725 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:2703 | 35.6325 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + cipher:14029 | 28.1125 | [('topic_primary', 'Fasciitis')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.75 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + +CASE 46: Patients with stomatitis or mucositis + cipher:3657 | 26.2575 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 14.3792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 14.3392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 13.8592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 13.8192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 13.7792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -5.3875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -12.3875 | [('topic_mismatch', 'Open Wound')] + +CASE 47: Patients with Barretts esophagus + cipher:2187 | 35.6725 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + cipher:13531 | 35.6525 | [('topic_primary', "Barrett's Esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s Esophagus"]}')] + cipher:15342 | 35.6125 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + cipher:30228 | 19.9625 | [('topic_primary', "Barrett's Oesophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s Oesophagus"]}')] + ohdsi:447 | -3.73 | [('topic_mismatch', 'Esophagitis')] + ohdsi:525 | -3.75 | [('topic_mismatch', 'Gastroesophageal Reflux Disease')] + ohdsi:446 | -3.75 | [('topic_mismatch', 'Eosinophilic esophagitis')] + ohdsi:500 | -3.75 | [('topic_mismatch', 'Gastritis')] + +CASE 48: Patients with regional enteritis + cipher:3534 | 35.6725 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + cipher:13571 | 28.1125 | [('topic_primary', 'Regional Enteritis')] + cipher:15376 | 21.1125 | [('topic_primary', 'Regional Enteritis')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:501 | -3.75 | [('topic_mismatch', "Crohn's disease")] + cipher:30115 | -3.8875 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + +CASE 49: Patients with primary localized osteoarthritis + cipher:3192 | 34.1525 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9425 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1325 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.75 | [('topic_primary', 'Osteoarthritis')] + +CASE 50: Patients with aortic valve disease + cipher:30301 | 24.6958 | [('topic_primary', 'Aortic Valve Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nonrheumatic aortic valve disorders"]}')] + cipher:17250 | 20.6125 | [('topic_primary', 'Aortic Valve Disease')] + cipher:2131 | 20.6125 | [('topic_primary', 'Aortic valve disease')] + cipher:31315 | 5.9458 | [('topic_primary', 'Valvular Disease')] + ohdsi:1172 | 0.83 | [('topic_primary', 'Heart valve disorder')] + ohdsi:1103 | -0.9833 | [('topic_primary', 'Cardiac Valve Surgery'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Cardiac Valve Surgery"]}')] + ohdsi:876 | -4.19 | [('topic_primary', 'Cardiac Valve Surgery')] + ohdsi:1315 | -4.21 | [('topic_primary', 'Cardiac Valve Surgery')] + +CASE 51: Patients with chronic periodontitis + cipher:2371 | 28.1525 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13494 | 28.1125 | [('topic_primary', 'Chronic Periodontitis')] + cipher:3397 | 25.1925 | [('topic_primary', 'Periodontitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Periodontitis"]}')] + cipher:15317 | 20.6325 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13492 | 20.1725 | [('topic_primary', 'Periodontitis')] + cipher:15315 | 12.6125 | [('topic_primary', 'Periodontitis')] + cipher:29206 | -3.8875 | [('topic_mismatch', 'Peritonsillar Abscess')] + cipher:16074 | -10.8875 | [('topic_mismatch', 'Dental Caries Risk')] + +CASE 52: Patients with hypertensive chronic kidney disease + ohdsi:923 | 30.5 | [('topic_primary', 'Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic Kidney Disease"]}')] + cipher:2846 | 28.1125 | [('topic_primary', 'Hypertensive Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertensive Chronic Kidney Disease"]}')] + ohdsi:1191 | 19.08 | [('topic_primary', 'Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic Kidney Disease"]}')] + ohdsi:964 | 16.85 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31686 | 16.6525 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31287 | 14.1325 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31697 | 7.8725 | [('topic_primary', 'Hypertensive Heart and Renal Disease')] + ohdsi:41 | 4.75 | [('topic_primary', 'Chronic Kidney Disease')] + +CASE 53: Patients with cardiomyopathy + cipher:30192 | 32.9625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 32.5058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 29.08 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 28.9025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 27.5292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 19.1325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + cipher:31280 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + +CASE 54: Patients with scleritis or episcleritis + cipher:30069 | 28.7125 | [('topic_primary', 'Scleritis and Episcleritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Scleritis and Episcleritis"]}')] + cipher:3581 | 28.6125 | [('topic_primary', 'Scleritis and episcleritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Scleritis and episcleritis"]}')] + ohdsi:1226 | -3.71 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -3.73 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:1223 | -3.75 | [('topic_mismatch', 'Uveitis')] + ohdsi:620 | -3.75 | [('topic_mismatch', 'Uveitis')] + cipher:30247 | -3.8875 | [('topic_mismatch', 'Uveitis')] + cipher:15886 | -3.8875 | [('topic_mismatch', 'Anterior Uveitis')] + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + cipher:12820 | 15.8325 | [('topic_primary', 'Carbohydrate Transport and Metabolism Disorders')] + cipher:12818 | 14.8058 | [('topic_primary', 'Disorders of Carbohydrate Transport and Metabolism')] + cipher:3256 | 14.3125 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:17097 | 14.2525 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:14837 | 14.2125 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:2597 | 9.6411 | [('topic_primary', 'Protein Plasma-Amino-Acid Transport and Metabolism')] + cipher:2573 | 8.4258 | [('topic_primary', 'Carbohydrate Metabolism Disorders')] + cipher:2616 | -0.0208 | [('topic_primary', 'Amino-acid transport')] + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + ohdsi:1187 | 27.35 | [('topic_primary', 'acetaminophen exposure')] + ohdsi:1158 | -0.69 | [('topic_mismatch', 'Aspirin Exposure')] + ohdsi:719 | -9.75 | [('topic_mismatch', 'Hepatic Injury')] + cipher:18919 | -13.3875 | [('topic_mismatch', 'Serious Adverse Events')] + ohdsi:735 | -15.75 | [('topic_mismatch', 'Acute Liver Injury')] + ohdsi:293 | -15.75 | [('topic_mismatch', 'Acute Liver Injury')] + cipher:18446 | -15.8075 | [('topic_mismatch', 'Acute Liver Injury')] + cipher:31254 | -16.8875 | [('topic_mismatch', 'Liver Disease')] + +CASE 57: Patients diagnosed with dyschromia and vitiligo + cipher:13900 | 35.6725 | [('topic_primary', 'Dyschromia and Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dyschromia and Vitiligo"]}')] + cipher:2628 | 35.6325 | [('topic_primary', 'Dyschromia and Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dyschromia and Vitiligo"]}')] + ohdsi:471 | 21.85 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:30727 | 21.6925 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:13901 | 19.6125 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:15590 | 18.1525 | [('topic_primary', 'Dyschromia and Vitiligo')] + cipher:13902 | 13.4458 | [('topic_primary', 'Other Dyschromia')] + cipher:15592 | 5.9458 | [('topic_primary', 'Other dyschromia')] + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + ohdsi:716 | 28.125 | [('topic_primary', 'Acute Hepatic Injury'), ('topic_context', '{"context_conditions": ["Hepatic Failure"], "target_conditions": ["Acute Hepatic Injury"]}')] + ohdsi:735 | 26.56 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure"], "target_conditions": ["Acute Liver Injury"]}')] + ohdsi:294 | 26.08 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure", "Viral Hepatitis", "Alcoholic Liver Disease"], "target_conditions": [... [truncated 22 chars]')] + ohdsi:293 | 25.25 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + cipher:18447 | 25.2125 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + cipher:18446 | 25.1525 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + ohdsi:719 | 22.1667 | [('topic_primary', 'Hepatic Injury'), ('topic_context', '{"context_conditions": ["Jaundice", "Liver Disease"], "target_conditions": ["Acute Hepatic Injury"]}')] + ohdsi:736 | 19.52 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure"], "target_conditions": ["Acute Liver Injury"]}')] + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + cipher:3108 | 35.6725 | [('topic_primary', 'Nerve Plexus Lesions'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nerve Plexus Lesions"]}')] + cipher:13084 | 26.2125 | [('topic_primary', 'Nerve Plexus Lesions')] + cipher:13085 | 25.1125 | [('topic_primary', 'Nerve Root Lesions'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nerve Root Lesions"]}')] + cipher:14974 | 20.6925 | [('topic_primary', 'Nerve Plexus Lesions')] + cipher:13083 | 18.0192 | [('topic_primary', 'Nerve Root and Plexus Disorders')] + cipher:14975 | 13.1125 | [('topic_primary', 'Nerve Root Lesions')] + cipher:14973 | 10.4992 | [('topic_primary', 'Nerve Root and Plexus Disorders')] + cipher:3109 | 10.4792 | [('topic_primary', 'Nerve root and plexus disorders')] + +CASE 60: patients with a diagnosis of PRES + ohdsi:223 | 21.95 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:1317 | -3.69 | [('topic_mismatch', 'Reye’s syndrome')] + ohdsi:516 | -3.71 | [('topic_mismatch', 'Thrombotic microangiopathy')] + ohdsi:248 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:1084 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:520 | -3.75 | [('topic_mismatch', 'Hypertensive disorder')] + ohdsi:543 | -3.75 | [('topic_mismatch', 'Seizure')] + +CASE 61: patients with chronic ulcerative colitis + ohdsi:860 | 35.83 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis"]}')] + ohdsi:458 | 35.79 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis"]}')] + cipher:30724 | 35.6125 | [('topic_primary', 'Ulcerative Colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative Colitis"]}')] + cipher:3770 | 32.1125 | [('topic_primary', 'Ulcerative colitis (chronic)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis (chronic)"]}')] + ohdsi:201 | 31.435 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": ["Rectal hemorrhage", "Inflammatory bowel disease", "Complications"], "target_conditions": ["Ulce... [truncated 17 chars]')] + ohdsi:775 | 4.0278 | [('topic_context', '{"context_conditions": ["First IBD Occurrence", "Chronic Ulcerative Proctitis"], "target_conditions": ["Inflammatory Bow... [truncated 13 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1067 | 0.5833 | [('topic_context', '{"context_conditions": ["Ulcerative colitis", "Rectal hemorrhage"], "target_conditions": ["JAK inhibitors"]}'), ('context_without_primary', 'topic only matched context fields')] + cipher:4126 | -3.8875 | [('topic_mismatch', 'Inflammatory Bowel Disease')] + +CASE 62: Veteran patients with developmental disorders that are pervasive + cipher:3415 | 32.1725 | [('topic_primary', 'Pervasive Developmental Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pervasive Developmental Disorders"]}')] + cipher:12996 | 25.4458 | [('topic_primary', 'Pervasive Developmental Disorders')] + cipher:17197 | 17.6525 | [('topic_primary', 'Developmental Delays and Disorders')] + cipher:17193 | 16.5458 | [('topic_primary', 'Pervasive Developmental Disorders')] + cipher:18933 | 15.7792 | [('topic_primary', 'Mental Health Disorders')] + cipher:17138 | 7.6125 | [('topic_primary', 'Other Persistent Mental Disorders')] + cipher:30605 | -3.8075 | [('topic_mismatch', 'Autism Spectrum')] + cipher:30166 | -3.8675 | [('topic_mismatch', 'Down Syndrome')] + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + ohdsi:510 | 34.29 | [('topic_primary', 'Acute myocardial infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute myocardial infarction"]}')] + ohdsi:1081 | 34.27 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:18982 | 34.1925 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:31590 | 34.1125 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:31275 | 34.1125 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + ohdsi:881 | 29.0167 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute myocardial infarction"]}')] + cipher:3998 | 27.1725 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Myocardial Infarction"]}')] + cipher:30748 | 27.1125 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Myocardial Infarction"]}')] + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + ohdsi:632 | 19.35 | [('topic_primary', 'Antiphospholipid syndrome')] + ohdsi:781 | 19.33 | [('topic_primary', 'Antiphospholipid Syndrome')] + ohdsi:738 | 2.77 | [('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:683 | -5.25 | [('topic_mismatch', 'Antineutrophil cytoplasmic antibody positive vasculitis')] + cipher:31306 | -5.3875 | [('topic_mismatch', 'Atrial Fibrillation')] + cipher:16293 | -12.3875 | [('topic_mismatch', 'Stroke Rehospitalization')] + cipher:31284 | -12.8875 | [('topic_mismatch', 'Thrombophilia')] + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + ohdsi:864 | 21.75 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + ohdsi:651 | 21.75 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31120 | 21.7125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31241 | 21.6125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:30112 | 21.6125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31141 | 21.6125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + ohdsi:33 | 17.5833 | [('topic_primary', 'Dementia')] + cipher:31123 | 3.2558 | [('topic_context', '{"context_conditions": ["Insomnia Symptoms", "Dementia Cases"], "target_conditions": ["Specific Dementias"]}'), ('context_without_primary', 'topic only matched context fields')] + +CASE 66: patients who experienced a GI bleed adverse event + ohdsi:888 | 31.75 | [('topic_primary', 'Gastrointestinal Bleeding'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Gastrointestinal Bleeding"]}')] + ohdsi:417 | 29.77 | [('topic_primary', 'Gastrointestinal Bleeding'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Gastrointestinal Bleeding"]}')] + ohdsi:349 | 24.29 | [('topic_primary', 'Gastrointestinal Bleeding')] + ohdsi:77 | 24.25 | [('topic_primary', 'Gastrointestinal Bleeding')] + cipher:4285 | 21.1125 | [('topic_primary', 'Bleeding'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Bleeding"]}')] + ohdsi:1197 | 20.25 | [('topic_primary', 'Gastrointestinal Bleeding')] + ohdsi:299 | 18.31 | [('topic_primary', 'Gastrointestinal Bleeding and Perforation')] + ohdsi:57 | 16.25 | [('topic_primary', 'Bleeding')] + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + ohdsi:678 | 34.35 | [('topic_primary', 'COVID-19'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COVID-19"]}')] + cipher:31308 | 30.5975 | [('topic_primary', 'Confirmed COVID-19'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Confirmed COVID-19"]}')] + cipher:31276 | 24.4458 | [('topic_primary', 'COVID-19 Infection'), ('topic_context', '{"context_conditions": ["COVID-19"], "target_conditions": []}')] + ohdsi:47 | 18.75 | [('topic_primary', 'COVID-19 diagnosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COVID-19 diagnosis"]}')] + ohdsi:59 | 17.9967 | [('topic_primary', 'COVID-19 Diagnosis'), ('topic_context', '{"context_conditions": ["SARS-CoV-2 test"], "target_conditions": ["COVID-19 diagnosis"]}')] + ohdsi:44 | 17.1233 | [('topic_primary', 'COVID-19 Infection')] + cipher:16189 | 16.9458 | [('topic_primary', 'COVID-19 Severity')] + ohdsi:346 | -1.75 | [('topic_mismatch', 'Outpatient Visit')] + +CASE 68: veterans who experienced an abdominal aortic aneurysm + cipher:29240 | 33.6125 | [('topic_primary', 'Abdominal Aortic Aneurysm (AAA)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Abdominal Aortic Aneurysm"]}')] + cipher:29169 | 26.1125 | [('topic_primary', 'Abdominal Aortic Aneurysm (AAA)')] + ohdsi:1093 | 12.0 | [('topic_primary', 'Abdominal Aortic Aneurysm Repair'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Abdominal Aortic Aneurysm Repair"]}')] + ohdsi:866 | -2.8367 | [('topic_primary', 'Aortic Repair')] + ohdsi:1290 | -2.8767 | [('topic_primary', 'Aortic Repair')] + ohdsi:1314 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + ohdsi:1291 | -20.25 | [('topic_mismatch', 'Bypass Surgery')] + ohdsi:1102 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 69: patients with COPD according to diagnostic codes in the EHR + cipher:29794 | 35.7125 | [('topic_primary', 'COPD'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COPD"]}')] + cipher:4241 | 35.6125 | [('topic_primary', 'COPD'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COPD"]}')] + cipher:29756 | 21.7725 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:31297 | 21.7525 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + ohdsi:1192 | 17.87 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:16274 | 14.2125 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:29553 | -3.8875 | [('topic_mismatch', 'Sleep Apnea')] + cipher:29755 | -3.8875 | [('topic_mismatch', 'Asthma')] + +CASE 70: patients hospitalized at least once for heart failure + cipher:16152 | 37.6125 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:934 | 36.9567 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": ["Hypertension"], "target_conditions": ["Heart Failure"]}')] + ohdsi:1303 | 34.33 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + cipher:30106 | 34.1125 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + cipher:16291 | 30.1725 | [('topic_primary', 'Heart Failure Rehospitalization'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure Rehospitalization"]}')] + ohdsi:979 | 27.35 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:938 | 26.75 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:68 | 25.75 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + cipher:30803 | 30.2525 | [('topic_primary', 'Meglitinide Use in Type 2 Diabetes')] + cipher:31250 | 25.1925 | [('topic_primary', 'Diabetes')] + cipher:31195 | 25.1125 | [('topic_primary', 'Diabetes')] + cipher:30170 | 24.9625 | [('topic_primary', 'Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Type 1 Diabetes", "Type 2 Diabetes"]}')] + cipher:16207 | 24.8625 | [('topic_primary', 'Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Diabetes Mellitus"]}')] + cipher:3760 | 19.3375 | [('topic_primary', 'Type 1 diabetes with ophthalmic manifestations'), ('topic_context', '{"context_conditions": ["ophthalmic manifestations"], "target_conditions": ["Type 1 diabetes"]}')] + cipher:16277 | 16.6125 | [('topic_primary', 'Diabetes')] + cipher:30759 | 16.3625 | [('topic_primary', 'Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Diabetes Mellitus"]}')] + diff --git a/docs/evaluation/phenotype_recommendations/testing-weighted_1.0_0.0.txt b/docs/evaluation/phenotype_recommendations/testing-weighted_1.0_0.0.txt new file mode 100644 index 0000000..0b2b14f --- /dev/null +++ b/docs/evaluation/phenotype_recommendations/testing-weighted_1.0_0.0.txt @@ -0,0 +1,2310 @@ +INFO: creating output file /tmp/phenotype_recommendation_tests.json +INFO: Cardiac defibrillator in situ (MAP) +INFO: Fasciitis (gwPheWAS) +INFO: Acute prostatitis (MAP) +INFO: [P] Esophagectomy +INFO: [P][R] Peripheral neuritis +INFO: [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap +INFO: [P][R] Allergic rhinitis +INFO: Ischemic Heart Disease (Sandhu) +INFO: Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) +INFO: [P] Lung Resection +INFO: [P] Laryngitis +INFO: Regional Enteritis (Phecode) +INFO: Renal Sclerosis NOS (VADC) +INFO: Other cardiomyopathy (MAP) +INFO: [P] Posterior reversible encephalopathy syndrome PRES +INFO: [P] Anorexia Nervosa +INFO: [P] Dizziness or giddiness including motion sickness and vertigo +INFO: Polymyalgia Rheumatica (VADC) +INFO: Adrenal Cortical Steroids Causing Adverse Effects in Therapeutic Use (Phecode) +INFO: [P][R] Low blood pressure +INFO: [P] Encephalopathy +INFO: [P] Birdshot chorioretinitis +INFO: Macular Degeneration (Senile) of Retina Nos (Phecode) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: [P] Primary adenocarcinoma of rectum MSI-L +INFO: Blister (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Neurofibromatosis type 1 (FP) +INFO: Keloid scar (gwPheWAS) +INFO: [P] acetaminophen exposure 10 +INFO: [P] Antibiotics Rifamycins 10 +INFO: Joint/ligament sprain (gwPheWAS) +INFO: Miscarriage; stillbirth (MAP) +INFO: Arterial embolism and thrombosis of lower extremity artery (MAP) +INFO: [P] New users of Cephalosporin systemetic nested in Urinary Tract Infection +INFO: [P] Hospitalization with preinfarction syndrome +INFO: Personal history of diseases of blood and blood-forming organs (MAP) +INFO: Other Benign Pancreatic Conditions (Nguyen) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: [P] New users of dihydropyridine calcium channel blockers +INFO: Renal Sclerosis NOS (VADC) +INFO: Polymyalgia Rheumatica (VADC) +INFO: Autoimmune Hemolytic Anemias (VADC) +INFO: Cardiac Complications Not Elsewhere Classified (VADC) +INFO: Fasciitis (gwPheWAS) +INFO: Stomatitis and mucositis (gwPheWAS) +INFO: Barrett's esophagus (gwPheWAS) +INFO: Regional Enteritis (Phecode) +INFO: Osteoarthrosis Localized Primary (Phecode) +INFO: Aortic Valve Disease (Phecode) +INFO: Chronic Periodontitis (Phecode) +INFO: Hypertensive chronic kidney disease (MAP) +INFO: Other cardiomyopathy (MAP) +INFO: Scleritis and episcleritis (MAP) +INFO: Other disorders of carbohydrate transport and metabolism (MAP) +INFO: [P] acetaminophen exposure 10 +INFO: Dyschromia and Vitiligo +INFO: Acute Hepatic Injury with no pre-existing liver disease +INFO: Nerve Plexus Lesions +INFO: Posterior reversible encephalopathy syndrome PRES +INFO: Ulcerative colitis (chronic) +INFO: Pervasive Developmental Disorders +INFO: Acute myocardial infarction +INFO: Antiphospholipid syndrome +INFO: dementia in older adults +INFO: GI bleeding adverse event outcome +INFO: running COVID outpatient diagnosis cohort +INFO: running abdominal aortic aneurysm in veterans +INFO: COPD phenotype using diagnosis codes +INFO: heart failure hospitalization cohort +INFO: diabetes medication-based phenotype +INFO: Tests completed. File written. +RESULTS SUMMARY: +count 71 +CASE 1: Patients with an implanted cardiac defibrillator + shortlist: ['cipher:30773'] + rec_ids: ['cipher:30773'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30773'] + final_deterministic: {'selected_ids': ['cipher:30773'], 'matched_llm_ids': ['cipher:30773'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 2: Patients diagnosed with fasciitis + shortlist: ['ohdsi:1075', 'cipher:15684'] + rec_ids: ['ohdsi:1075', 'cipher:15684'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1075', 'cipher:15684'] + final_deterministic: {'selected_ids': ['ohdsi:1075', 'cipher:15684'], 'matched_llm_ids': ['cipher:15684'], 'defaulted_ids': ['ohdsi:1075'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 3: Patients with acute prostatitis + shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:15499'] + rec_ids: ['ohdsi:283', 'cipher:13720', 'cipher:15499'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:283', 'cipher:13720', 'cipher:15499'] + final_deterministic: {'selected_ids': ['ohdsi:283', 'cipher:13720', 'cipher:15499'], 'matched_llm_ids': ['ohdsi:283', 'cipher:13720'], 'defaulted_ids': ['cipher:15499'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 4: Patients who underwent esophagectomy + shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870', 'ohdsi:1309'] + rec_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870', 'ohdsi:1309'] + final_deterministic: {'selected_ids': ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870'], 'matched_llm_ids': ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 5: Patients diagnosed with peripheral neuritis + shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + rec_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: ['ohdsi:540'] + dedupe_backfilled_ids: [] + dedupe_applied: True + enforced_shortlist_ids: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + final_deterministic: {'selected_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'matched_llm_ids': ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + shortlist: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + rec_ids: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + final_deterministic: {'selected_ids': ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759'], 'matched_llm_ids': ['ohdsi:1042', 'ohdsi:759'], 'defaulted_ids': ['ohdsi:760'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 7: Patients with allergic rhinitis + shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + rec_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + final_deterministic: {'selected_ids': ['ohdsi:508', 'ohdsi:367', 'cipher:2081'], 'matched_llm_ids': ['ohdsi:508', 'ohdsi:367'], 'defaulted_ids': ['cipher:2081'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 8: Patients with ischemic heart disease + shortlist: ['ohdsi:654', 'cipher:16261', 'ohdsi:532', 'cipher:29218'] + rec_ids: ['ohdsi:654', 'cipher:16261', 'ohdsi:532'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:654', 'cipher:16261', 'ohdsi:532', 'cipher:29218'] + final_deterministic: {'selected_ids': ['ohdsi:654', 'cipher:16261', 'ohdsi:532'], 'matched_llm_ids': ['ohdsi:654', 'cipher:16261'], 'defaulted_ids': ['ohdsi:532'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + shortlist: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + rec_ids: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + final_deterministic: {'selected_ids': ['cipher:2643', 'cipher:13824', 'cipher:2798'], 'matched_llm_ids': ['cipher:2643', 'cipher:13824'], 'defaulted_ids': ['cipher:2798'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 10: Patients who underwent lung resection + shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + rec_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + final_deterministic: {'selected_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'matched_llm_ids': ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 11: Patients with laryngitis + shortlist: ['ohdsi:355'] + rec_ids: ['ohdsi:355'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:355'] + final_deterministic: {'selected_ids': ['ohdsi:355'], 'matched_llm_ids': ['ohdsi:355'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 12: Patients with regional enteritis + shortlist: ['ohdsi:884', 'cipher:3534'] + rec_ids: ['ohdsi:884', 'cipher:3534'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:884', 'cipher:3534'] + final_deterministic: {'selected_ids': ['ohdsi:884', 'cipher:3534'], 'matched_llm_ids': ['ohdsi:884', 'cipher:3534'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 13: Patients with renal sclerosis + shortlist: ['cipher:13646', 'cipher:13656', 'ohdsi:1003', 'ohdsi:481'] + rec_ids: ['cipher:13646', 'cipher:13656', 'ohdsi:1003'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13646', 'cipher:13656', 'ohdsi:1003', 'ohdsi:481'] + final_deterministic: {'selected_ids': ['cipher:13646', 'cipher:13656', 'ohdsi:1003'], 'matched_llm_ids': ['cipher:13646', 'cipher:13656', 'ohdsi:1003'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 14: Patients with cardiomyopathy + shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30155', 'cipher:30192'] + rec_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30155'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:31252', 'ohdsi:679', 'cipher:30155', 'cipher:30192'] + final_deterministic: {'selected_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30155'], 'matched_llm_ids': ['cipher:31252', 'ohdsi:679', 'cipher:30155'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 15: Patients with a diagnosis of PRES + shortlist: ['ohdsi:223'] + rec_ids: ['ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:223'], 'matched_llm_ids': ['ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 16: Patients with anorexia nervosa + shortlist: ['ohdsi:1340', 'cipher:17187', 'ohdsi:1339'] + rec_ids: ['ohdsi:1340', 'cipher:17187', 'ohdsi:1339'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1340', 'cipher:17187', 'ohdsi:1339'] + final_deterministic: {'selected_ids': ['ohdsi:1340', 'cipher:17187', 'ohdsi:1339'], 'matched_llm_ids': ['ohdsi:1340'], 'defaulted_ids': ['cipher:17187', 'ohdsi:1339'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 17: Patients with dizziness, vertigo, or motion sickness + shortlist: ['ohdsi:893', 'ohdsi:244', 'cipher:13215'] + rec_ids: ['ohdsi:893', 'ohdsi:244', 'cipher:13215'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:893', 'ohdsi:244', 'cipher:13215'] + final_deterministic: {'selected_ids': ['ohdsi:893', 'ohdsi:244', 'cipher:13215'], 'matched_llm_ids': ['ohdsi:893', 'ohdsi:244', 'cipher:13215'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 18: Patients with polymyalgia rheumatica + shortlist: ['ohdsi:670', 'cipher:30277'] + rec_ids: ['ohdsi:670', 'cipher:30277'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:670', 'cipher:30277'] + final_deterministic: {'selected_ids': ['ohdsi:670', 'cipher:30277'], 'matched_llm_ids': ['cipher:30277'], 'defaulted_ids': ['ohdsi:670'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + shortlist: ['cipher:2064', 'cipher:14303', 'cipher:2821'] + rec_ids: ['cipher:2064', 'cipher:14303', 'cipher:2821'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:2064', 'cipher:14303', 'cipher:2821'] + final_deterministic: {'selected_ids': ['cipher:2064', 'cipher:14303', 'cipher:2821'], 'matched_llm_ids': ['cipher:14303', 'cipher:2821'], 'defaulted_ids': ['cipher:2064'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 20: Patients with low blood pressure + shortlist: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + rec_ids: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + replaced_ids: [] + blocked_pool_ids: ['ohdsi:997'] + blocked_candidate_reasons: {'ohdsi:997': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + final_deterministic: {'selected_ids': ['ohdsi:339', 'ohdsi:890', 'cipher:13390'], 'matched_llm_ids': ['ohdsi:339', 'ohdsi:890'], 'defaulted_ids': ['cipher:13390'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 21: Patients with encephalopathy + shortlist: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'] + rec_ids: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'], 'matched_llm_ids': ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 22: Patients with birdshot chorioretinitis + shortlist: ['ohdsi:1223'] + rec_ids: ['ohdsi:1223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1223'] + final_deterministic: {'selected_ids': ['ohdsi:1223'], 'matched_llm_ids': ['ohdsi:1223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 23: Older adults with macular degeneration + shortlist: ['cipher:30295', 'cipher:3006', 'cipher:2505', 'cipher:16256'] + rec_ids: ['cipher:30295', 'cipher:3006', 'cipher:2505'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30295', 'cipher:3006', 'cipher:2505', 'cipher:16256'] + final_deterministic: {'selected_ids': ['cipher:30295', 'cipher:3006', 'cipher:2505'], 'matched_llm_ids': ['cipher:30295', 'cipher:3006', 'cipher:2505'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 24: Patients with autoimmune hemolytic anemia + shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'matched_llm_ids': ['ohdsi:738', 'cipher:18441'], 'defaulted_ids': ['ohdsi:1018'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 25: Patients with MSI-low rectal adenocarcinoma + shortlist: [] + rec_ids: [] + replaced_ids: ['ohdsi:820', 'ohdsi:822', 'ohdsi:823'] + blocked_pool_ids: ['ohdsi:823', 'ohdsi:820', 'ohdsi:836', 'ohdsi:822', 'ohdsi:819'] + blocked_candidate_reasons: {'ohdsi:823': 'procedure_for_diagnosis_intent', 'ohdsi:820': 'procedure_for_diagnosis_intent', 'ohdsi:836': 'procedure_for_diagnosis_intent', 'ohdsi:822': 'procedure_for_diagnosis_intent', 'ohdsi:819': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: [] + final_deterministic: {'selected_ids': [], 'matched_llm_ids': [], 'defaulted_ids': [], 'invalid_llm_ids': ['ohdsi:1234', 'ohdsi:2636', 'ohdsi:33'], 'duplicate_llm_ids': [], 'used_llm_justification_count': 0, 'used_default_justification_count': 0} + +CASE 26: Patients with blistering skin lesions + shortlist: ['ohdsi:652', 'ohdsi:376'] + rec_ids: ['ohdsi:652', 'ohdsi:376'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:652', 'ohdsi:376'] + final_deterministic: {'selected_ids': ['ohdsi:652', 'ohdsi:376'], 'matched_llm_ids': ['ohdsi:652', 'ohdsi:376'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 27: Patients with stomatitis or mucositis + shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + rec_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + final_deterministic: {'selected_ids': ['cipher:17298', 'cipher:15333', 'cipher:3657'], 'matched_llm_ids': ['cipher:17298', 'cipher:15333'], 'defaulted_ids': ['cipher:3657'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 28: Patients with neurofibromatosis type 1 + shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + rec_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + final_deterministic: {'selected_ids': ['ohdsi:697', 'ohdsi:304', 'ohdsi:305'], 'matched_llm_ids': ['ohdsi:697', 'ohdsi:304'], 'defaulted_ids': ['ohdsi:305'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 29: Patients with keloid scars + shortlist: ['cipher:15610'] + rec_ids: ['cipher:15610'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15610'] + final_deterministic: {'selected_ids': ['cipher:15610'], 'matched_llm_ids': ['cipher:15610'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 30: Patients with acetaminophen exposure + shortlist: ['ohdsi:1187', 'ohdsi:1158'] + rec_ids: ['ohdsi:1187', 'ohdsi:1158'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1187', 'ohdsi:1158'] + final_deterministic: {'selected_ids': ['ohdsi:1187', 'ohdsi:1158'], 'matched_llm_ids': ['ohdsi:1187'], 'defaulted_ids': ['ohdsi:1158'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 31: Patients exposed to rifamycin antibiotics + shortlist: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + rec_ids: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + final_deterministic: {'selected_ids': ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'], 'matched_llm_ids': ['ohdsi:1211'], 'defaulted_ids': ['ohdsi:1207', 'ohdsi:1203'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 32: Patients with a joint or ligament sprain + shortlist: ['ohdsi:363', 'ohdsi:452', 'cipher:14236'] + rec_ids: ['ohdsi:363', 'ohdsi:452', 'cipher:14236'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:363', 'ohdsi:452', 'cipher:14236'] + final_deterministic: {'selected_ids': ['ohdsi:363', 'ohdsi:452', 'cipher:14236'], 'matched_llm_ids': ['ohdsi:363', 'ohdsi:452', 'cipher:14236'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 33: Pregnant patients with miscarriage or stillbirth + shortlist: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + rec_ids: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + final_deterministic: {'selected_ids': ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'], 'matched_llm_ids': ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + shortlist: ['cipher:13354', 'cipher:31817'] + rec_ids: ['cipher:13354', 'cipher:31817'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13354', 'cipher:31817'] + final_deterministic: {'selected_ids': ['cipher:13354', 'cipher:31817'], 'matched_llm_ids': ['cipher:13354'], 'defaulted_ids': ['cipher:31817'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + shortlist: ['ohdsi:1061', 'cipher:31223', 'ohdsi:1186'] + rec_ids: ['ohdsi:1061', 'cipher:31223', 'ohdsi:1186'] + replaced_ids: ['ohdsi:861'] + blocked_pool_ids: ['ohdsi:861'] + blocked_candidate_reasons: {'ohdsi:861': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1061', 'cipher:31223', 'ohdsi:1186'] + final_deterministic: {'selected_ids': ['ohdsi:1061', 'cipher:31223', 'ohdsi:1186'], 'matched_llm_ids': ['ohdsi:1061', 'cipher:31223'], 'defaulted_ids': ['ohdsi:1186'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 36: Patients hospitalized with preinfarction syndrome + shortlist: ['ohdsi:939'] + rec_ids: ['ohdsi:939'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:939'] + final_deterministic: {'selected_ids': ['ohdsi:939'], 'matched_llm_ids': ['ohdsi:939'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + shortlist: ['ohdsi:738', 'cipher:3412'] + rec_ids: ['ohdsi:738', 'cipher:3412'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:738', 'cipher:3412'] + final_deterministic: {'selected_ids': ['ohdsi:738', 'cipher:3412'], 'matched_llm_ids': ['ohdsi:738'], 'defaulted_ids': ['cipher:3412'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 38: Patients with benign pancreatic conditions + shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + rec_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + final_deterministic: {'selected_ids': ['cipher:16954', 'cipher:16952', 'cipher:16953'], 'matched_llm_ids': ['cipher:16952', 'cipher:16953'], 'defaulted_ids': ['cipher:16954'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 39: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + rec_ids: ['cipher:3192', 'cipher:4399', 'cipher:3190'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4399', 'cipher:3190'], 'matched_llm_ids': ['cipher:3192', 'cipher:4399', 'cipher:3190'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 40: New users of dihydropyridine calcium channel blockers + shortlist: ['ohdsi:1049', 'ohdsi:1052', 'ohdsi:1047'] + rec_ids: ['ohdsi:1049', 'ohdsi:1052', 'ohdsi:1047'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1049', 'ohdsi:1052', 'ohdsi:1047'] + final_deterministic: {'selected_ids': ['ohdsi:1049', 'ohdsi:1052', 'ohdsi:1047'], 'matched_llm_ids': ['ohdsi:1047'], 'defaulted_ids': ['ohdsi:1049', 'ohdsi:1052'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 41: Veteran patients with renal sclerosis + shortlist: ['cipher:17322', 'cipher:18902', 'cipher:31257', 'cipher:30611'] + rec_ids: ['cipher:17322', 'cipher:18902', 'cipher:31257'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17322', 'cipher:18902', 'cipher:31257', 'cipher:30611'] + final_deterministic: {'selected_ids': ['cipher:17322', 'cipher:18902', 'cipher:31257'], 'matched_llm_ids': ['cipher:17322', 'cipher:18902', 'cipher:31257'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 42: Veteran patients with polymyalgia rheumatica + shortlist: ['cipher:30277', 'cipher:17453', 'cipher:3460'] + rec_ids: ['cipher:30277', 'cipher:17453', 'cipher:3460'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30277', 'cipher:17453', 'cipher:3460'] + final_deterministic: {'selected_ids': ['cipher:30277', 'cipher:17453', 'cipher:3460'], 'matched_llm_ids': ['cipher:30277', 'cipher:17453'], 'defaulted_ids': ['cipher:3460'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 43: Veteran patients with autoimmune hemolytic anemia + shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + rec_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + final_deterministic: {'selected_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'matched_llm_ids': ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 44: Veteran patients with cardiac complications + shortlist: ['ohdsi:1081'] + rec_ids: ['ohdsi:1081'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1081'] + final_deterministic: {'selected_ids': ['ohdsi:1081'], 'matched_llm_ids': ['ohdsi:1081'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 45: Patients diagnosed with fasciitis + shortlist: ['ohdsi:1075', 'cipher:15684'] + rec_ids: ['ohdsi:1075', 'cipher:15684'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1075', 'cipher:15684'] + final_deterministic: {'selected_ids': ['ohdsi:1075', 'cipher:15684'], 'matched_llm_ids': ['cipher:15684'], 'defaulted_ids': ['ohdsi:1075'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 1} + +CASE 46: Patients with stomatitis or mucositis + shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + rec_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + final_deterministic: {'selected_ids': ['cipher:17298', 'cipher:15333', 'cipher:3657'], 'matched_llm_ids': ['cipher:17298', 'cipher:15333'], 'defaulted_ids': ['cipher:3657'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 47: Patients with Barretts esophagus + shortlist: ['cipher:15342', 'cipher:30228', 'cipher:2187'] + rec_ids: ['cipher:15342', 'cipher:30228', 'cipher:2187'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:15342', 'cipher:30228', 'cipher:2187'] + final_deterministic: {'selected_ids': ['cipher:15342', 'cipher:30228', 'cipher:2187'], 'matched_llm_ids': ['cipher:15342', 'cipher:30228'], 'defaulted_ids': ['cipher:2187'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 48: Patients with regional enteritis + shortlist: ['ohdsi:884', 'cipher:3534'] + rec_ids: ['ohdsi:884', 'cipher:3534'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:884', 'cipher:3534'] + final_deterministic: {'selected_ids': ['ohdsi:884', 'cipher:3534'], 'matched_llm_ids': ['ohdsi:884', 'cipher:3534'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 49: Patients with primary localized osteoarthritis + shortlist: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + rec_ids: ['cipher:3192', 'cipher:4399', 'cipher:3190'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + final_deterministic: {'selected_ids': ['cipher:3192', 'cipher:4399', 'cipher:3190'], 'matched_llm_ids': ['cipher:3192', 'cipher:4399', 'cipher:3190'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 50: Patients with aortic valve disease + shortlist: ['ohdsi:1172', 'cipher:30301', 'cipher:31315'] + rec_ids: ['ohdsi:1172', 'cipher:30301', 'cipher:31315'] + replaced_ids: ['ohdsi:1103'] + blocked_pool_ids: ['ohdsi:1103', 'ohdsi:876'] + blocked_candidate_reasons: {'ohdsi:1103': 'procedure_for_diagnosis_intent', 'ohdsi:876': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1172', 'cipher:30301', 'cipher:31315'] + final_deterministic: {'selected_ids': ['ohdsi:1172', 'cipher:30301', 'cipher:31315'], 'matched_llm_ids': ['ohdsi:1172'], 'defaulted_ids': ['cipher:30301', 'cipher:31315'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 2} + +CASE 51: Patients with chronic periodontitis + shortlist: ['cipher:13494', 'cipher:3397', 'cipher:2371'] + rec_ids: ['cipher:13494', 'cipher:3397', 'cipher:2371'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13494', 'cipher:3397', 'cipher:2371'] + final_deterministic: {'selected_ids': ['cipher:13494', 'cipher:3397', 'cipher:2371'], 'matched_llm_ids': ['cipher:13494', 'cipher:2371'], 'defaulted_ids': ['cipher:3397'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 52: Patients with hypertensive chronic kidney disease + shortlist: ['ohdsi:1191', 'ohdsi:964', 'cipher:31686', 'cipher:30635'] + rec_ids: ['ohdsi:1191', 'ohdsi:964', 'cipher:31686'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1191', 'ohdsi:964', 'cipher:31686', 'cipher:30635'] + final_deterministic: {'selected_ids': ['ohdsi:1191', 'ohdsi:964', 'cipher:31686'], 'matched_llm_ids': ['ohdsi:1191', 'ohdsi:964', 'cipher:31686'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 53: Patients with cardiomyopathy + shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + rec_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + final_deterministic: {'selected_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'matched_llm_ids': ['cipher:30192', 'cipher:31252', 'ohdsi:679'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 54: Patients with scleritis or episcleritis + shortlist: ['cipher:30069'] + rec_ids: ['cipher:30069'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:30069'] + final_deterministic: {'selected_ids': ['cipher:30069'], 'matched_llm_ids': ['cipher:30069'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + shortlist: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + rec_ids: ['cipher:12820', 'cipher:12818', 'cipher:3256'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + final_deterministic: {'selected_ids': ['cipher:12820', 'cipher:12818', 'cipher:3256'], 'matched_llm_ids': ['cipher:12820', 'cipher:12818', 'cipher:3256'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + shortlist: ['ohdsi:1187'] + rec_ids: ['ohdsi:1187'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:1187'] + final_deterministic: {'selected_ids': ['ohdsi:1187'], 'matched_llm_ids': ['ohdsi:1187'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 57: Patients diagnosed with dyschromia and vitiligo + shortlist: ['cipher:13900', 'ohdsi:471', 'cipher:2628'] + rec_ids: ['cipher:13900', 'ohdsi:471', 'cipher:2628'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13900', 'ohdsi:471', 'cipher:2628'] + final_deterministic: {'selected_ids': ['cipher:13900', 'ohdsi:471', 'cipher:2628'], 'matched_llm_ids': ['cipher:13900', 'ohdsi:471'], 'defaulted_ids': ['cipher:2628'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + shortlist: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293', 'cipher:18447'] + rec_ids: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293', 'cipher:18447'] + final_deterministic: {'selected_ids': ['ohdsi:735', 'ohdsi:294', 'ohdsi:293'], 'matched_llm_ids': ['ohdsi:294', 'ohdsi:293'], 'defaulted_ids': ['ohdsi:735'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + shortlist: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + rec_ids: ['cipher:13084', 'cipher:13085', 'cipher:14974'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + final_deterministic: {'selected_ids': ['cipher:13084', 'cipher:13085', 'cipher:14974'], 'matched_llm_ids': ['cipher:13084', 'cipher:13085', 'cipher:14974'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 60: patients with a diagnosis of PRES + shortlist: ['ohdsi:223'] + rec_ids: ['ohdsi:223'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:223'] + final_deterministic: {'selected_ids': ['ohdsi:223'], 'matched_llm_ids': ['ohdsi:223'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 1, 'used_default_justification_count': 0} + +CASE 61: patients with chronic ulcerative colitis + shortlist: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201', 'cipher:30724'] + rec_ids: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201', 'cipher:30724'] + final_deterministic: {'selected_ids': ['ohdsi:860', 'ohdsi:458', 'ohdsi:201'], 'matched_llm_ids': ['ohdsi:860', 'ohdsi:458', 'ohdsi:201'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 62: Veteran patients with developmental disorders that are pervasive + shortlist: ['cipher:17197', 'cipher:17193', 'cipher:18933', 'cipher:17141'] + rec_ids: ['cipher:17197', 'cipher:17193', 'cipher:18933'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:17197', 'cipher:17193', 'cipher:18933', 'cipher:17141'] + final_deterministic: {'selected_ids': ['cipher:17197', 'cipher:17193', 'cipher:18933'], 'matched_llm_ids': ['cipher:17197', 'cipher:17193'], 'defaulted_ids': ['cipher:18933'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + shortlist: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982', 'cipher:31590'] + rec_ids: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982', 'cipher:31590'] + final_deterministic: {'selected_ids': ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'], 'matched_llm_ids': ['ohdsi:510', 'ohdsi:1081', 'cipher:18982'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + shortlist: ['ohdsi:632', 'ohdsi:781'] + rec_ids: ['ohdsi:632', 'ohdsi:781'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:632', 'ohdsi:781'] + final_deterministic: {'selected_ids': ['ohdsi:632', 'ohdsi:781'], 'matched_llm_ids': ['ohdsi:632', 'ohdsi:781'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + shortlist: ['ohdsi:651', 'ohdsi:864', 'ohdsi:927', 'cipher:31120'] + rec_ids: ['ohdsi:651', 'ohdsi:864', 'ohdsi:927'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:651', 'ohdsi:864', 'ohdsi:927', 'cipher:31120'] + final_deterministic: {'selected_ids': ['ohdsi:651', 'ohdsi:864', 'ohdsi:927'], 'matched_llm_ids': ['ohdsi:651', 'ohdsi:864', 'ohdsi:927'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 66: patients who experienced a GI bleed adverse event + shortlist: ['ohdsi:482', 'ohdsi:888', 'ohdsi:417', 'ohdsi:349'] + rec_ids: ['ohdsi:482', 'ohdsi:888', 'ohdsi:417'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:482', 'ohdsi:888', 'ohdsi:417', 'ohdsi:349'] + final_deterministic: {'selected_ids': ['ohdsi:482', 'ohdsi:888', 'ohdsi:417'], 'matched_llm_ids': ['ohdsi:482', 'ohdsi:888', 'ohdsi:417'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + shortlist: ['cipher:31308', 'ohdsi:678'] + rec_ids: ['cipher:31308', 'ohdsi:678'] + replaced_ids: ['ohdsi:47', 'ohdsi:59'] + blocked_pool_ids: ['ohdsi:47', 'ohdsi:59'] + blocked_candidate_reasons: {'ohdsi:47': 'withdrawn', 'ohdsi:59': 'withdrawn'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:31308', 'ohdsi:678'] + final_deterministic: {'selected_ids': ['cipher:31308', 'ohdsi:678'], 'matched_llm_ids': ['cipher:31308', 'ohdsi:678'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 0} + +CASE 68: veterans who experienced an abdominal aortic aneurysm + shortlist: [] + rec_ids: [] + replaced_ids: ['ohdsi:1290', 'ohdsi:866'] + blocked_pool_ids: ['ohdsi:1290', 'ohdsi:866', 'ohdsi:1314', 'ohdsi:1291', 'ohdsi:1102'] + blocked_candidate_reasons: {'ohdsi:1290': 'procedure_for_diagnosis_intent', 'ohdsi:866': 'procedure_for_diagnosis_intent', 'ohdsi:1314': 'procedure_for_diagnosis_intent', 'ohdsi:1291': 'procedure_for_diagnosis_intent', 'ohdsi:1102': 'procedure_for_diagnosis_intent'} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: [] + final_deterministic: {'selected_ids': [], 'matched_llm_ids': [], 'defaulted_ids': [], 'invalid_llm_ids': ['ohdsi:11348', 'ohdsi:2683', 'ohdsi:33'], 'duplicate_llm_ids': [], 'used_llm_justification_count': 0, 'used_default_justification_count': 0} + +CASE 69: patients with COPD according to diagnostic codes in the EHR + shortlist: ['cipher:29794', 'cipher:29756', 'cipher:31297', 'ohdsi:1192'] + rec_ids: ['cipher:29794', 'cipher:29756', 'cipher:31297'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:29794', 'cipher:29756', 'cipher:31297', 'ohdsi:1192'] + final_deterministic: {'selected_ids': ['cipher:29794', 'cipher:29756', 'cipher:31297'], 'matched_llm_ids': ['cipher:29794', 'cipher:29756', 'cipher:31297'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 70: patients hospitalized at least once for heart failure + shortlist: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + rec_ids: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + final_deterministic: {'selected_ids': ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'], 'matched_llm_ids': ['ohdsi:934', 'ohdsi:1303'], 'defaulted_ids': ['cipher:16152'], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 2, 'used_default_justification_count': 1} + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + shortlist: ['cipher:31250', 'cipher:31195', 'cipher:30170', 'cipher:3760'] + rec_ids: ['cipher:31250', 'cipher:31195', 'cipher:30170'] + replaced_ids: [] + blocked_pool_ids: [] + blocked_candidate_reasons: {} + duplicate_topic_ids: [] + dedupe_backfilled_ids: [] + dedupe_applied: False + enforced_shortlist_ids: ['cipher:31250', 'cipher:31195', 'cipher:30170', 'cipher:3760'] + final_deterministic: {'selected_ids': ['cipher:31250', 'cipher:31195', 'cipher:30170'], 'matched_llm_ids': ['cipher:31250', 'cipher:31195', 'cipher:30170'], 'defaulted_ids': [], 'invalid_llm_ids': [], 'duplicate_llm_ids': [], 'used_llm_justification_count': 3, 'used_default_justification_count': 0} + +CASE 1: Patients with an implanted cardiac defibrillator + intent_facets_raw: {'condition_or_topic': 'cardiac defibrillator', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients with implanted devices', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Device based diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['patients with implanted cardiac devices']} + intent_facets_effective: {'condition_or_topic': 'cardiac defibrillator', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients with implanted devices', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Device based diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['patients with implanted cardiac devices']} + planning_shortlist: ['cipher:30773'] + planning_reasoning: ['Selected shortlisted candidates align with cardiac defibrillator as a diagnosis-oriented study intent.', 'Included Trifascicular Block (Kuan) as a diagnosis candidate focused on Trifascicular Block.'] + recommendations: + cipher:30773 | Trifascicular Block (Kuan) | This phenotype identifies patients diagnosed with or hospitalized due to Trifascicular Block, based on ICD-10 codes and hospitalizations, aligning with the study intent of patients with an implanted c + +CASE 2: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1075', 'cipher:15684'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included [P] FDA AESI Narcolepsy as a diagnosis candidate focused on Narcolepsy.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + ohdsi:1075 | [P] FDA AESI Narcolepsy | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis, based on ICD codes used in the Million Veteran Program (MVP) phenome-wide GWAS, aligning with the study intent of patients diagnosed with + +CASE 3: Patients with acute prostatitis + intent_facets_raw: {'condition_or_topic': 'prostatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'acute', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['acute infection'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'prostatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'acute', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['acute infection'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:283', 'cipher:13720', 'cipher:15499'] + planning_reasoning: ['Selected shortlisted candidates align with prostatitis as a diagnosis-oriented study intent.', 'Included [P] Prostatitis as a diagnosis candidate focused on Prostatitis.', 'Included Acute Prostatitis (Phecode) as a diagnosis candidate focused on Acute Prostatitis.', 'Included Acute prostatitis (gwPheWAS) as a comorbidity covariate candidate focused on Acute Prostatitis.'] + recommendations: + ohdsi:283 | [P] Prostatitis | This phenotype directly addresses the study intent of identifying patients with prostatitis, including chronic prostatitis. + cipher:13720 | Acute Prostatitis (Phecode) | This phenotype represents a diagnosis for Acute Prostatitis based on ICD-9 and ICD-10 codes, aligning with the study's focus on acute prostatitis. + cipher:15499 | Acute prostatitis (gwPheWAS) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 4: Patients who underwent esophagectomy + intent_facets_raw: {'condition_or_topic': 'esophagectomy', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'esophagectomy', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1097', 'ohdsi:1294', 'ohdsi:870', 'ohdsi:1309'] + planning_reasoning: ['Selected shortlisted candidates align with esophagectomy as a procedure-oriented study intent.', 'Included [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults as a procedure candidate focused on Esophagectomy.', 'Included [P] Esophagectomy, adults, inpt stay, no ED as a procedure candidate focused on Esophagectomy.'] + recommendations: + ohdsi:1097 | [P] Esophagectomy, adults, inpt stay, no ED, post op new Afib | This phenotype directly reflects the surgical procedure of esophagectomy and its association with postoperative atrial fibrillation, aligning with the study intent. + ohdsi:1294 | [P] Esophagectomy, adults | This phenotype represents the esophagectomy procedure, providing a broader capture of the surgical intervention. + ohdsi:870 | [P] Esophagectomy, adults, inpt stay, no ED | This phenotype represents the surgical procedure of esophagectomy, typically performed in an inpatient setting and aligns with the study intent. + +CASE 5: Patients diagnosed with peripheral neuritis + intent_facets_raw: {'condition_or_topic': 'peripheral neuritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'peripheral neuritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:388', 'ohdsi:389', 'ohdsi:238'] + planning_reasoning: ['Selected shortlisted candidates align with peripheral neuritis as a diagnosis-oriented study intent.', 'Included [P][R] Peripheral neuritis as a diagnosis candidate focused on Peripheral neuritis.', 'Included [P] Peripheral Neuropathy or Neuritits as a diagnosis candidate focused on Peripheral Neuropathy.', 'Included [P][R] Optic neuritis as a diagnosis candidate focused on Optic neuritis.', 'Near-duplicate topical variants were removed to preserve distinct recommendation coverage: ohdsi:540.'] + recommendations: + ohdsi:388 | [P][R] Peripheral neuritis | Directly reflects the study intent of patients diagnosed with peripheral neuritis. + ohdsi:389 | [P] Peripheral Neuropathy or Neuritits | Captures the first occurrence of peripheral neuritis or neuropathy, a related condition. + ohdsi:238 | [P][R] Optic neuritis | Optical neuritis is related to peripheral nerve damage and aligns with the study intent. + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + intent_facets_raw: {'condition_or_topic': 'TNF-alpha inhibitors and IL-12/23 inhibitors', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'concomitant exposure', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication exposure'], 'care_setting_cues': ['any setting'], 'population_cues': ['concomitant use']} + intent_facets_effective: {'condition_or_topic': 'TNF-alpha inhibitors and IL-12/23 inhibitors', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'concomitant exposure', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication exposure'], 'care_setting_cues': ['any setting'], 'population_cues': ['concomitant use']} + planning_shortlist: ['ohdsi:760', 'ohdsi:1042', 'ohdsi:759', 'ohdsi:1040'] + planning_reasoning: ['Selected shortlisted candidates align with TNF-alpha inhibitors and IL-12/23 inhibitors as a medication based-oriented study intent.', 'Included [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap as a medication based candidate focused on IL-23 Inhibitors.', 'Included [P] New users of IL-23 inhibitors as a medication based candidate focused on IL-23 inhibitors.', 'Included [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap as a medication based candidate focused on TNF-alpha Inhibitors, IL23 Inhibitors.'] + recommendations: + ohdsi:760 | [P] Concomitant IL 23 Inhibitors and IL12_23 Inhibitors - GE 30D overlap | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1042 | [P] New users of IL-23 inhibitors | Captures patients initiating IL-23 inhibitor therapy, which is relevant to understanding the timeline of inhibitor exposure. + ohdsi:759 | [P] Concomitant TNF - alpha Inhibitors and IL12_23 Inhibitors - GE 30D overlap | This phenotype directly addresses concomitant use of TNF-alpha and IL23 inhibitors, aligning with the study intent of examining multiple inhibitor exposures. + +CASE 7: Patients with allergic rhinitis + intent_facets_raw: {'condition_or_topic': 'allergic rhinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'allergic rhinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:508', 'ohdsi:367', 'cipher:2081', 'cipher:30258'] + planning_reasoning: ['Selected shortlisted candidates align with allergic rhinitis as a diagnosis-oriented study intent.', 'Included [P][R] Allergic rhinitis as a diagnosis candidate focused on Allergic rhinitis.', 'Included [P] Allergic Rhinitis as a diagnosis candidate focused on Allergic Rhinitis.', 'Included Allergic rhinitis (MAP) as a diagnosis candidate focused on Allergic Rhinitis.'] + recommendations: + ohdsi:508 | [P][R] Allergic rhinitis | This phenotype represents the diagnosis of Allergic rhinitis, primarily used for cohort definition, aligning directly with the study intent. + ohdsi:367 | [P] Allergic Rhinitis | This phenotype represents events of Allergic Rhinitis. Primarily intended for diagnosing this condition. + cipher:2081 | Allergic rhinitis (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 8: Patients with ischemic heart disease + intent_facets_raw: {'condition_or_topic': 'Ischemic heart disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart disease'], 'care_setting_cues': ['Hospital care', 'Clinic care'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'Ischemic heart disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Diagnosis of heart disease'], 'care_setting_cues': ['Hospital care', 'Clinic care'], 'population_cues': ['Adult patients']} + planning_shortlist: ['ohdsi:654', 'cipher:16261', 'ohdsi:532', 'cipher:29218'] + planning_reasoning: ['Selected shortlisted candidates align with Ischemic heart disease as a diagnosis-oriented study intent.', 'Included [P][R] Ischemic heart disease as a diagnosis candidate focused on Ischemic heart disease.', 'Included Ischemic Heart Disease (Sandhu) as a diagnosis candidate focused on Ischemic Heart Disease.', 'Included [P][R] Heart disease as a diagnosis candidate focused on Heart Disease.'] + recommendations: + ohdsi:654 | [P][R] Ischemic heart disease | This phenotype represents the diagnosis of Ischemic heart disease, aligning directly with the study intent of patients with ischemic heart disease. + cipher:16261 | Ischemic Heart Disease (Sandhu) | This phenotype identifies patients with Ischemic Heart Disease based on ICD-10 diagnostic codes, a relevant definition for the study intent. + ohdsi:532 | [P][R] Heart disease | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + intent_facets_raw: {'condition_or_topic': 'Hemorrhage in early pregnancy, Threatened labor', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Pregnancy status'], 'care_setting_cues': ['Any care setting'], 'population_cues': ['Pregnant patients']} + intent_facets_effective: {'condition_or_topic': 'Hemorrhage in early pregnancy, Threatened labor', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Pregnancy status'], 'care_setting_cues': ['Any care setting'], 'population_cues': ['Pregnant patients']} + planning_shortlist: ['cipher:2643', 'cipher:13824', 'cipher:2798'] + planning_reasoning: ['Selected shortlisted candidates align with Hemorrhage in early pregnancy, Threatened labor as a diagnosis-oriented study intent.', 'Included Early or threatened labor; hemorrhage in early pregnancy (MAP) as a complication candidate focused on Early or threatened labor; hemorrhage in early pregnancy.', 'Included Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) as a complication candidate focused on Early Labor Hemorrhage.', 'Included Hemorrhage in early pregnancy (MAP) as a diagnosis candidate focused on Hemorrhage in early pregnancy.'] + recommendations: + cipher:2643 | Early or threatened labor; hemorrhage in early pregnancy (MAP) | This phenotype directly addresses the study intent of 'Pregnant patients with hemorrhage in early pregnancy or threatened labor' and is defined using a MAP algorithm which aligns with the clinical top + cipher:13824 | Early or Threatened Labor Hemorrhage in Early Pregnancy (Phecode) | This phenotype also represents 'Early or Threatened Labor Hemorrhage in Early Pregnancy' and is based on Phecode mapping, a clinically relevant approach for identifying this condition. + cipher:2798 | Hemorrhage in early pregnancy (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 10: Patients who underwent lung resection + intent_facets_raw: {'condition_or_topic': 'lung resection', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient|any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital patient'], 'population_cues': ['patients undergoing surgery']} + intent_facets_effective: {'condition_or_topic': 'lung resection', 'clinical_topic_aliases': [], 'phenotype_role': 'procedure', 'care_setting': 'inpatient|any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['surgical procedure'], 'care_setting_cues': ['hospital patient'], 'population_cues': ['patients undergoing surgery']} + planning_shortlist: ['ohdsi:1268', 'ohdsi:1308', 'ohdsi:869'] + planning_reasoning: ['Selected shortlisted candidates align with lung resection as a procedure-oriented study intent.', 'Included [P] Lung Resection, adults, inpt stay, no ED, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, post op new Afib as a procedure candidate focused on Lung Resection.', 'Included [P] Lung Resection, adults, inpt stay, no ED as a procedure candidate focused on Lung Resection.'] + recommendations: + ohdsi:1268 | [P] Lung Resection, adults, inpt stay, no ED, post op new Afib | This phenotype directly reflects the study intent of patients undergoing lung resection and experiencing postoperative atrial fibrillation. + ohdsi:1308 | [P] Lung Resection, adults, post op new Afib | This phenotype aligns with the study's focus on lung resection and post-operative atrial fibrillation. + ohdsi:869 | [P] Lung Resection, adults, inpt stay, no ED | This phenotype represents lung resection as a surgical procedure occurring in an inpatient setting, which is relevant to the study’s intention. + +CASE 11: Patients with laryngitis + intent_facets_raw: {'condition_or_topic': 'laryngitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'unknown', 'validation_preference': 'preferred', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Acute upper airway inflammation'], 'care_setting_cues': ['Outpatient clinic'], 'population_cues': ['Patients']} + intent_facets_effective: {'condition_or_topic': 'laryngitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'unknown', 'validation_preference': 'preferred', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Acute upper airway inflammation'], 'care_setting_cues': ['Outpatient clinic'], 'population_cues': ['Patients']} + planning_shortlist: ['ohdsi:355'] + planning_reasoning: ['Selected shortlisted candidates align with laryngitis as a diagnosis-oriented study intent.', 'Included [P] Laryngitis as a diagnosis candidate focused on Laryngitis.'] + recommendations: + ohdsi:355 | [P] Laryngitis | This phenotype represents the diagnosis of Laryngitis, aligning directly with the study intent of patients with laryngitis. + +CASE 12: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:884', 'cipher:3534'] + planning_reasoning: ['Selected shortlisted candidates align with regional enteritis as a diagnosis-oriented study intent.', 'Included [P] Diarrhea including enteritis as a diagnosis candidate focused on Diarrhea.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.'] + recommendations: + ohdsi:884 | [P] Diarrhea including enteritis | Diarrhea is a common symptom associated with regional enteritis and aligns with the study intent. + cipher:3534 | Regional enteritis (MAP) | Identifies patients with Regional enteritis based on MAP algorithm probability. + +CASE 13: Patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['renal sclerosis'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['renal sclerosis'], 'care_setting_cues': ['inpatient care', 'outpatient care'], 'population_cues': ['adult patients']} + planning_shortlist: ['cipher:13646', 'cipher:13656', 'ohdsi:1003', 'ohdsi:481'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Nephritis Nephrosis Renal Sclerosis (Phecode) as a diagnosis candidate focused on Nephritis Nephrosis Renal Sclerosis.', 'Included Renal Sclerosis NOS (Phecode) as a diagnosis candidate focused on Renal Sclerosis.', 'Included [P] Renal cancer as a diagnosis candidate focused on Renal cancer.'] + recommendations: + cipher:13646 | Nephritis Nephrosis Renal Sclerosis (Phecode) | This phenotype directly addresses renal sclerosis as defined by the Phecode system, utilizing ICD-9 and ICD-10 codes. + cipher:13656 | Renal Sclerosis NOS (Phecode) | This phenotype also represents renal sclerosis based on ICD-9 and ICD-10 codes, providing an alternative representation. + ohdsi:1003 | [P] Renal cancer | While primarily focused on renal cancer, this phenotype captures patients with a kidney condition, which aligns with the broader concept of renal sclerosis, especially given the potential co-occurrenc + +CASE 14: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Cardiac condition'], 'care_setting_cues': ['Hospital', 'Clinic'], 'population_cues': ['Adult patients']} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['Cardiac condition'], 'care_setting_cues': ['Hospital', 'Clinic'], 'population_cues': ['Adult patients']} + planning_shortlist: ['cipher:31252', 'ohdsi:679', 'cipher:30155', 'cipher:30192'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.', 'Included Hypertrophic Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Hypertrophic Cardiomyopathy (HCM).'] + recommendations: + cipher:31252 | Cardiomyopathy (Knight) | This phenotype is an HDR UK phenotype identifying patients with Cardiomyopathy based on ICD-10, SNOMED CT and Read Codes v2, aligning directly with the study intent. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype directly addresses Takotsubo cardiomyopathy, aligning with the study intent of patients with cardiomyopathy. + cipher:30155 | Hypertrophic Cardiomyopathy (HDR UK) | This phenotype identifies patients with Hypertrophic Cardiomyopathy diagnosis, aligning with the study intent of patients with cardiomyopathy. + +CASE 15: Patients with a diagnosis of PRES + intent_facets_raw: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': ['Posterior Reversible Encephalopathy'], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Primary diagnosis'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': ['posterior reversible encephalopathy'], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Primary diagnosis'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with PRES as a diagnosis-oriented study intent.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | The study intent is for patients with PRES. This phenotype directly addresses the specified condition. + +CASE 16: Patients with anorexia nervosa + intent_facets_raw: {'condition_or_topic': 'anorexia nervosa', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient|ed|any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'anorexia nervosa', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient|inpatient|ed|any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1340', 'cipher:17187', 'ohdsi:1339'] + planning_reasoning: ['Selected shortlisted candidates align with anorexia nervosa as a diagnosis-oriented study intent.', 'Included [P] Anorexia Nervosa as a diagnosis candidate focused on Anorexia Nervosa.', 'Included Anorexia Nervosa (VADC) as a diagnosis candidate focused on Anorexia Nervosa.', 'Included [P] Bulimia Nervosa as a diagnosis candidate focused on Bulimia Nervosa.'] + recommendations: + ohdsi:1340 | [P] Anorexia Nervosa | This phenotype represents the diagnosis of Anorexia Nervosa as defined by events lasting 30 days, collapsed within 1 year, aligning directly with the study intent. + cipher:17187 | Anorexia Nervosa (VADC) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:1339 | [P] Bulimia Nervosa | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 17: Patients with dizziness, vertigo, or motion sickness + intent_facets_raw: {'condition_or_topic': 'dizziness, vertigo, motion sickness', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dizziness, vertigo, motion sickness', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:893', 'ohdsi:244', 'cipher:13215'] + planning_reasoning: ['Selected shortlisted candidates align with dizziness, vertigo, motion sickness as a diagnosis-oriented study intent.', 'Included [P] Vertigo as a diagnosis candidate focused on Vertigo.', 'Included [P] Dizziness or giddiness including motion sickness and vertigo as a unknown candidate focused on Dizziness.', 'Included Dizziness and Giddiness (Lightheadedness and Vertigo) (Phecode) as a diagnosis candidate focused on Dizziness and Giddiness (Lightheadedness and Vertigo).'] + recommendations: + ohdsi:893 | [P] Vertigo | This phenotype represents a condition characterized by dizziness and is directly related to the study intent. + ohdsi:244 | [P] Dizziness or giddiness including motion sickness and vertigo | This phenotype directly reflects the study intent (dizziness, vertigo, motion sickness) and represents symptoms of dizziness, including motion sickness and vertigo. + cipher:13215 | Dizziness and Giddiness (Lightheadedness and Vertigo) (Phecode) | This phenotype represents a defined set of ICD-9 and ICD-10 codes related to dizziness and giddiness, aligning with the study intent. + +CASE 18: Patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['outpatient clinic'], 'population_cues': ['adult patients']} + planning_shortlist: ['ohdsi:670', 'cipher:30277'] + planning_reasoning: ['Selected shortlisted candidates align with polymyalgia rheumatica as a diagnosis-oriented study intent.', 'Included [P][R] Temporal arteritis as a diagnosis candidate focused on Temporal arteritis.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.'] + recommendations: + ohdsi:670 | [P][R] Temporal arteritis | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype directly addresses Polymyalgia Rheumatica based on ICD10 and hospitalization codes, aligning with the study intent. + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + intent_facets_raw: {'condition_or_topic': 'corticosteroid adverse effects', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Adverse effects related to medication'], 'care_setting_cues': ['Inpatient care', 'Outpatient care'], 'population_cues': ['Patients receiving corticosteroids']} + intent_facets_effective: {'condition_or_topic': 'corticosteroid adverse effects', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['Adverse effects related to medication'], 'care_setting_cues': ['Inpatient care', 'Outpatient care'], 'population_cues': ['Patients receiving corticosteroids']} + planning_shortlist: ['cipher:2064', 'cipher:14303', 'cipher:2821'] + planning_reasoning: ['Selected shortlisted candidates align with corticosteroid adverse effects as a diagnosis-oriented study intent.', 'Included Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) as a complication candidate focused on Adrenal Cortical Steroids Adverse Effects.', 'Included Adrenal Cortical Steroids Causing Adverse Effects in Therapeutic Use (Phecode) as a complication candidate focused on Adrenal Steroid Adverse Effects.', 'Included Hormones and synthetic substitutes causing adverse effects in therapeutic use (MAP) as a comorbidity covariate candidate focused on Hormones and Synthetic Substitutes Adverse Effects.'] + recommendations: + cipher:2064 | Adrenal cortical steroids causing adverse effects in therapeutic use (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned complication match. + cipher:14303 | Adrenal Cortical Steroids Causing Adverse Effects in Therapeutic Use (Phecode) | This phenotype is directly related to adverse effects from therapeutic use of adrenal corticosteroids, aligning with the study intent. + cipher:2821 | Hormones and synthetic substitutes causing adverse effects in therapeutic use (MAP) | This phenotype, also based on MAP clustering, identifies patients with potential adverse effects from hormonal medications, which includes corticosteroids. + +CASE 20: Patients with low blood pressure + intent_facets_raw: {'condition_or_topic': 'Hypotension', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Hypotension', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:339', 'ohdsi:890', 'cipher:13390'] + planning_reasoning: ['Selected shortlisted candidates align with Hypotension as a diagnosis-oriented study intent.', 'Included [P][R] Low blood pressure as a severity candidate focused on Hypotension.', 'Included [P] Hypotension as a severity candidate focused on Hypotension.', 'Included Hypotension Nos (Phecode) as a diagnosis candidate focused on Hypotension.'] + recommendations: + ohdsi:339 | [P][R] Low blood pressure | This phenotype directly addresses the study intent of patients with low blood pressure, capturing severity measures. + ohdsi:890 | [P] Hypotension | This phenotype represents a measure of hypotension, aligning with the study intent and providing a broader scope for patient identification. + cipher:13390 | Hypotension Nos (Phecode) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 21: Patients with encephalopathy + intent_facets_raw: {'condition_or_topic': 'encephalopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['hospitalized patients', 'clinic visits'], 'population_cues': ['patients with encephalopathy']} + intent_facets_effective: {'condition_or_topic': 'encephalopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['primary diagnosis'], 'care_setting_cues': ['hospitalized patients', 'clinic visits'], 'population_cues': ['patients with encephalopathy']} + planning_shortlist: ['ohdsi:194', 'ohdsi:331', 'ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with encephalopathy as a diagnosis-oriented study intent.', 'Included [P] Encephalopathy or its presentations as a diagnosis candidate focused on Encephalopathy.', 'Included [P] Encephalopathy as a diagnosis candidate focused on Encephalopathy.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:194 | [P] Encephalopathy or its presentations | This phenotype directly addresses the study intent of patients with encephalopathy, encompassing various presentations and underlying causes. + ohdsi:331 | [P] Encephalopathy | This phenotype represents the clinical diagnosis of Encephalopathy, aligning with the study's focus on encephalopathy. + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | This phenotype represents diagnosis of Posterior reversible encephalopathy syndrome (PRES), a specific condition related to encephalopathy and fits the study intent. + +CASE 22: Patients with birdshot chorioretinitis + intent_facets_raw: {'condition_or_topic': 'birdshot chorioretinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'ophthalmology patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['ocular inflammation diagnosis'], 'care_setting_cues': ['ophthalmology clinic'], 'population_cues': ['patients with vision loss']} + intent_facets_effective: {'condition_or_topic': 'birdshot chorioretinitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'ophthalmology patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['ocular inflammation diagnosis'], 'care_setting_cues': ['ophthalmology clinic'], 'population_cues': ['patients with vision loss']} + planning_shortlist: ['ohdsi:1223'] + planning_reasoning: ['Selected shortlisted candidates align with birdshot chorioretinitis as a diagnosis-oriented study intent.', 'Included [P] Birdshot chorioretinitis as a diagnosis candidate focused on Uveitis.'] + recommendations: + ohdsi:1223 | [P] Birdshot chorioretinitis | This phenotype directly addresses the study intent of identifying patients with birdshot chorioretinitis, a form of uveitis. + +CASE 23: Older adults with macular degeneration + intent_facets_raw: {'condition_or_topic': 'macular degeneration', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'older adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['macular degeneration is a diagnosed condition'], 'care_setting_cues': ['outpatient care for eye conditions'], 'population_cues': ['older adults']} + intent_facets_effective: {'condition_or_topic': 'macular degeneration', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'older adults', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['macular degeneration is a diagnosed condition'], 'care_setting_cues': ['outpatient care for eye conditions'], 'population_cues': ['older adults']} + planning_shortlist: ['cipher:30295', 'cipher:3006', 'cipher:2505', 'cipher:16256'] + planning_reasoning: ['Selected shortlisted candidates align with macular degeneration as a diagnosis-oriented study intent.', 'Included Macular Degeneration (HDR UK) as a diagnosis candidate focused on Macular Degeneration.', 'Included Macular degeneration (senile) of retina NOS (MAP) as a diagnosis candidate focused on Macular Degeneration.', 'Included Degeneration of macula and posterior pole of retina (MAP) as a diagnosis candidate focused on Macular Degeneration.'] + recommendations: + cipher:30295 | Macular Degeneration (HDR UK) | This phenotype directly addresses the study intent of older adults with macular degeneration, using ICD10 codes, Med Codes, and Read codes v2 to identify patients with a diagnosis or history of the co + cipher:3006 | Macular degeneration (senile) of retina NOS (MAP) | This phenotype identifies patients with macular degeneration based on a MAP phenotype, derived from ICD codes, aligning with the study intent. + cipher:2505 | Degeneration of macula and posterior pole of retina (MAP) | This phenotype identifies patients with macular degeneration, defined through ICD codes and a MAP phenotype algorithm, corresponding to the study’s focus. + +CASE 24: Patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Autoimmune hemolytic anemia (OHDSI) as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype directly identifies patients with Autoimmune hemolytic anemia, aligning with the study intent. + cipher:18441 | Autoimmune hemolytic anemia (OHDSI) | This phenotype is another representation of Autoimmune hemolytic anemia, sourced from the OHDSI Phenotype Library and aligns with the study intent. + +CASE 25: Patients with MSI-low rectal adenocarcinoma + intent_facets_raw: {'condition_or_topic': 'colorectal cancer', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary cancer diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['adult patients']} + intent_facets_effective: {'condition_or_topic': 'colorectal cancer', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary cancer diagnosis'], 'care_setting_cues': ['hospital', 'clinic'], 'population_cues': ['adult patients']} + planning_shortlist: [] + planning_reasoning: ['Selected shortlisted candidates align with colorectal cancer as a diagnosis-oriented study intent.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:820, ohdsi:822, ohdsi:823.'] + recommendations: + +CASE 26: Patients with blistering skin lesions + intent_facets_raw: {'condition_or_topic': 'blistering skin lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'blistering skin lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:652', 'ohdsi:376'] + planning_reasoning: ['Selected shortlisted candidates align with blistering skin lesions as a diagnosis-oriented study intent.', 'Included [P][R] Vasculitis of the skin as a diagnosis candidate focused on Vasculitis of the skin.', 'Included [P][R] Bleeding skin as a outcome candidate focused on Bleeding Skin.'] + recommendations: + ohdsi:652 | [P][R] Vasculitis of the skin | This phenotype represents events of Vasculitis of the skin, a condition associated with blistering skin lesions. + ohdsi:376 | [P][R] Bleeding skin | Bleeding skin can be associated with blistering skin lesions and represents a relevant outcome. + +CASE 27: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + planning_reasoning: ['Selected shortlisted candidates align with Stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and mucositis (gwPheWAS) as a complication candidate focused on Stomatitis and mucositis.', 'Included Stomatitis and mucositis (ulcerative) (MAP) as a diagnosis candidate focused on Stomatitis and mucositis (ulcerative).'] + recommendations: + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | This phenotype directly addresses the study intent of patients with stomatitis or mucositis. + cipher:15333 | Stomatitis and mucositis (gwPheWAS) | This phenotype also represents stomatitis and mucositis, aligning with the study intent. + cipher:3657 | Stomatitis and mucositis (ulcerative) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 28: Patients with neurofibromatosis type 1 + intent_facets_raw: {'condition_or_topic': 'neurofibromatosis type 1', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'neurofibromatosis type 1', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:697', 'ohdsi:304', 'ohdsi:305', 'ohdsi:696'] + planning_reasoning: ['Selected shortlisted candidates align with neurofibromatosis type 1 as a diagnosis-oriented study intent.', 'Included [P][R] Neurofibromatosis type 1 as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.', 'Included Neurofibromatosis type 1 without Type 2 (FP) as a diagnosis candidate focused on Neurofibromatosis type 1.'] + recommendations: + ohdsi:697 | [P][R] Neurofibromatosis type 1 | This phenotype represents the diagnosis of Neurofibromatosis type 1 (NF1), aligning directly with the study intent of patients with this condition. + ohdsi:304 | Neurofibromatosis type 1 (FP) | This phenotype offers an alternative diagnosis representation for Neurofibromatosis type 1 (NF1). + ohdsi:305 | Neurofibromatosis type 1 without Type 2 (FP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 29: Patients with keloid scars + intent_facets_raw: {'condition_or_topic': 'keloid scars', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'skin conditions', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'keloid scars', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'skin conditions', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:15610'] + planning_reasoning: ['Selected shortlisted candidates align with keloid scars as a diagnosis-oriented study intent.', 'Included Keloid scar (gwPheWAS) as a comorbidity covariate candidate focused on Keloid Scar.'] + recommendations: + cipher:15610 | Keloid scar (gwPheWAS) | This phenotype definition was used in the Million Veteran Program phenome-wide GWAS study, directly addressing the study intent of patients with keloid scars. + +CASE 30: Patients with acetaminophen exposure + intent_facets_raw: {'condition_or_topic': 'acetaminophen exposure', 'clinical_topic_aliases': ['APAP exposure'], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication'], 'care_setting_cues': ['all settings'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'acetaminophen exposure', 'clinical_topic_aliases': ['apap exposure'], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication'], 'care_setting_cues': ['all settings'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:1187', 'ohdsi:1158'] + planning_reasoning: ['Selected shortlisted candidates align with acetaminophen exposure as a medication based-oriented study intent.', 'Included [P] acetaminophen exposure 10 as a medication based candidate focused on acetaminophen exposure.', 'Included [P] Aspirin 10 as a medication based candidate focused on Aspirin Exposure.'] + recommendations: + ohdsi:1187 | [P] acetaminophen exposure 10 | This phenotype directly addresses the study intent: ‘Patients with acetaminophen exposure’. It captures exposure to acetaminophen with a 30-day persistence window, aligning perfectly with the specifie + ohdsi:1158 | [P] Aspirin 10 | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 31: Patients exposed to rifamycin antibiotics + intent_facets_raw: {'condition_or_topic': 'Rifamycin antibiotics', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['antibiotic medication'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients receiving treatment']} + intent_facets_effective: {'condition_or_topic': 'Rifamycin antibiotics', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['antibiotic medication'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients receiving treatment']} + planning_shortlist: ['ohdsi:1211', 'ohdsi:1207', 'ohdsi:1203'] + planning_reasoning: ['Selected shortlisted candidates align with Rifamycin antibiotics as a medication based-oriented study intent.', 'Included [P] Antibiotics Rifamycins 10 as a medication based candidate focused on Rifamycins.', 'Included [P] Antibiotics Monobactams 10 as a medication based candidate focused on Antibiotics - Monobactams.', 'Included [P] Antibiotics Cephalosporins 10 as a medication based candidate focused on Antibiotics Cephalosporins.'] + recommendations: + ohdsi:1211 | [P] Antibiotics Rifamycins 10 | This phenotype directly addresses the study intent of patients exposed to rifamycin antibiotics, focusing on rifamycin drug exposure and persistence. + ohdsi:1207 | [P] Antibiotics Monobactams 10 | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1203 | [P] Antibiotics Cephalosporins 10 | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + +CASE 32: Patients with a joint or ligament sprain + intent_facets_raw: {'condition_or_topic': 'joint sprain', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Sprain'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Patients']} + intent_facets_effective: {'condition_or_topic': 'joint sprain', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Sprain'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Patients']} + planning_shortlist: ['ohdsi:363', 'ohdsi:452', 'cipher:14236'] + planning_reasoning: ['Selected shortlisted candidates align with joint sprain as a diagnosis-oriented study intent.', 'Included [P][R] Joint stiffness as a diagnosis candidate focused on Joint stiffness.', 'Included [P][R] Joint pain as a outcome candidate focused on Joint pain.', 'Included Joint Ligament Sprain (Phecode) as a diagnosis candidate focused on Joint Ligament Sprain.'] + recommendations: + ohdsi:363 | [P][R] Joint stiffness | Represents Joint stiffness, a condition primarily identified by the presence of Joint stiffness, aligning with the study intent of patients with a joint or ligament sprain. + ohdsi:452 | [P][R] Joint pain | Represents events of joint pain, primarily an outcome measure, relevant to the study intent of patients with a joint or ligament sprain. + cipher:14236 | Joint Ligament Sprain (Phecode) | Phenotype representing Joint Ligament Sprain defined using Phecode mapping; primarily intended for diagnosis based on ICD codes, relevant to the study intent. + +CASE 33: Pregnant patients with miscarriage or stillbirth + intent_facets_raw: {'condition_or_topic': 'miscarriage|stillbirth', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['pregnancy']} + intent_facets_effective: {'condition_or_topic': 'miscarriage|stillbirth', 'clinical_topic_aliases': [], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'pregnant patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['pregnancy']} + planning_shortlist: ['ohdsi:627', 'ohdsi:1432', 'cipher:3056'] + planning_reasoning: ['Selected shortlisted candidates align with miscarriage|stillbirth as a outcome-oriented study intent.', 'Included [P][R] Miscarriage as a outcome candidate focused on Miscarriage.', 'Included [P] Still birth as a outcome candidate focused on Stillbirth.', 'Included Miscarriage; stillbirth (MAP) as a outcome candidate focused on Miscarriage; Stillbirth.'] + recommendations: + ohdsi:627 | [P][R] Miscarriage | This phenotype directly represents the event of miscarriage, aligning with the study intent of pregnant patients with miscarriage or stillbirth. + ohdsi:1432 | [P] Still birth | This phenotype represents stillbirth, a related outcome, consistent with the study's focus on miscarriage and stillbirth in pregnant patients. + cipher:3056 | Miscarriage; stillbirth (MAP) | This phenotype identifies patients with a confirmed diagnosis of miscarriage or stillbirth derived from MAP unsupervised clustering, providing another option for identifying this condition. + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + intent_facets_raw: {'condition_or_topic': 'arterial embolism or thrombosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'lower extremity', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Lower extremity arteries'], 'care_setting_cues': ['Inpatient', 'Outpatient'], 'population_cues': ['Lower extremity']} + intent_facets_effective: {'condition_or_topic': 'arterial embolism or thrombosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'lower extremity', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Lower extremity arteries'], 'care_setting_cues': ['Inpatient', 'Outpatient'], 'population_cues': ['Lower extremity']} + planning_shortlist: ['cipher:13354', 'cipher:31817'] + planning_reasoning: ['Selected shortlisted candidates align with arterial embolism or thrombosis as a diagnosis-oriented study intent.', 'Included Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) as a complication candidate focused on Arterial Embolism and Thrombosis.', 'Included Embolism or Thrombosis of Other Arteries (Allara) as a complication candidate focused on Embolism or Thrombosis.'] + recommendations: + cipher:13354 | Arterial Embolism and Thrombosis of Lower Extremity Artery (Phecode) | Directly aligns with the study intent: Patients with arterial embolism or thrombosis of a lower extremity artery. + cipher:31817 | Embolism or Thrombosis of Other Arteries (Allara) | Selected from the top reranked shortlisted candidates as a clinically aligned complication match. + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + intent_facets_raw: {'condition_or_topic': 'Urinary Tract Infection', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'Urinary Tract Infection', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1061', 'cipher:31223', 'ohdsi:1186'] + planning_reasoning: ['Selected shortlisted candidates align with Urinary Tract Infection as a diagnosis-oriented study intent.', 'Included [P] New users of Cephalosporin systemetic nested in Urinary Tract Infection as a medication based candidate focused on Cephalosporin Exposure.', 'Included Urinary Tract Infection (Palin) as a diagnosis candidate focused on Urinary Tract Infection.', 'Included [P] Urinary tract infectious 10 as a diagnosis candidate focused on Urinary Tract Infection.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:861.'] + recommendations: + ohdsi:1061 | [P] New users of Cephalosporin systemetic nested in Urinary Tract Infection | This phenotype directly addresses the study intent of new cephalosporin users with urinary tract infections. + cipher:31223 | Urinary Tract Infection (Palin) | Identifies patients with suspected or confirmed urinary tract infections, a core element of the study intent. + ohdsi:1186 | [P] Urinary tract infectious 10 | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 36: Patients hospitalized with preinfarction syndrome + intent_facets_raw: {'condition_or_topic': 'preinfarction syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'patients hospitalized', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['hospitalized'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'preinfarction syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'patients hospitalized', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['hospitalized'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:939'] + planning_reasoning: ['Selected shortlisted candidates align with preinfarction syndrome as a diagnosis-oriented study intent.', 'Included [P] Hospitalization with preinfarction syndrome as a outcome candidate focused on Hospitalization.'] + recommendations: + ohdsi:939 | [P] Hospitalization with preinfarction syndrome | This phenotype directly represents the study intent of patients hospitalized with preinfarction syndrome, capturing inpatient visit occurrences. + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + intent_facets_raw: {'condition_or_topic': 'blood or blood-forming organ disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with personal history', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'blood or blood-forming organ disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients with personal history', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:738', 'cipher:3412'] + planning_reasoning: ['Selected shortlisted candidates align with blood or blood-forming organ disease as a diagnosis-oriented study intent.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Personal history of diseases of blood and blood-forming organs (MAP) as a comorbidity covariate candidate focused on Personal history of diseases of blood and blood-forming organs.'] + recommendations: + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype directly addresses the study intent of patients with a personal history of blood or blood-forming organ disease, specifically focusing on Autoimmune hemolytic anemia. + cipher:3412 | Personal history of diseases of blood and blood-forming organs (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 38: Patients with benign pancreatic conditions + intent_facets_raw: {'condition_or_topic': 'pancreatic conditions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'benign', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'pancreatic conditions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'benign', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:16954', 'cipher:16952', 'cipher:16953', 'cipher:16955'] + planning_reasoning: ['Selected shortlisted candidates align with pancreatic conditions as a diagnosis-oriented study intent.', 'Included Pancreas Transplant Recipient (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Conditions.', 'Included Extrahepatic Cholangiocarcinoma (Nguyen) as a diagnosis candidate focused on Pancreatic Cancer.', 'Included Chronic Pancreatitis (Nguyen) as a comorbidity covariate candidate focused on Pancreatic Inflammation.'] + recommendations: + cipher:16954 | Pancreas Transplant Recipient (Nguyen) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + cipher:16952 | Extrahepatic Cholangiocarcinoma (Nguyen) | Identifies patients with pancreatic cancer based on ICD-10 codes, aligning with the study intent of investigating prevalence in veterans. + cipher:16953 | Chronic Pancreatitis (Nguyen) | Identifies veterans with pancreatic inflammation, a relevant covariate for studying pancreatic cancer prevalence. + +CASE 39: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Finger Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Osteoarthritis; localized (MAP) as a diagnosis candidate focused on Osteoarthritis localized.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype identifies patients with osteoarthritis based on a MAP algorithm probability cutoff of 0.41, aligning directly with the study intent of patients with primary localized osteoarthritis. + cipher:4399 | Finger Osteoarthritis (MVP) | This phenotype identifies patients with primary osteoarthritis, specifically focusing on finger OA, which could be relevant for investigating genetic markers associated with the condition, supporting + cipher:3190 | Osteoarthritis; localized (MAP) | This phenotype represents localized osteoarthritis based on MAP unsupervised clustering of ICD codes, primarily used for diagnosis and aligns with the study intent. + +CASE 40: New users of dihydropyridine calcium channel blockers + intent_facets_raw: {'condition_or_topic': 'dihydropyridine calcium channel blockers', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication'], 'care_setting_cues': ['any'], 'population_cues': ['new users']} + intent_facets_effective: {'condition_or_topic': 'dihydropyridine calcium channel blockers', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'new users', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication'], 'care_setting_cues': ['any'], 'population_cues': ['new users']} + planning_shortlist: ['ohdsi:1049', 'ohdsi:1052', 'ohdsi:1047'] + planning_reasoning: ['Selected shortlisted candidates align with dihydropyridine calcium channel blockers as a medication based-oriented study intent.', 'Included [P] New users of Beta blockers nested in essential hypertension as a medication based candidate focused on Beta Blockers.', 'Included [P] New users of Beta blockers nested in Acute Myocardial Infarction as a medication based candidate focused on Beta Blockers.', 'Included [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension as a medication based candidate focused on dihydropyridine calcium channel blockers.'] + recommendations: + ohdsi:1049 | [P] New users of Beta blockers nested in essential hypertension | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1052 | [P] New users of Beta blockers nested in Acute Myocardial Infarction | Selected from the top reranked shortlisted candidates as a clinically aligned medication based match. + ohdsi:1047 | [P] New users of dihydropyridine calcium channel blockers nested in essential hypertension | This phenotype directly addresses the study intent by identifying patients newly prescribed dihydropyridine calcium channel blockers for hypertension, aligning with the specified clinical topic. + +CASE 41: Veteran patients with renal sclerosis + intent_facets_raw: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'renal sclerosis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['cipher:17322', 'cipher:18902', 'cipher:31257', 'cipher:30611'] + planning_reasoning: ['Selected shortlisted candidates align with renal sclerosis as a diagnosis-oriented study intent.', 'Included Renal Sclerosis NOS (VADC) as a comorbidity covariate candidate focused on Renal Sclerosis.', 'Included Renal failure (PERC) as a comorbidity covariate candidate focused on Renal Failure.', 'Included Renal disease - Elixhauser Primary Care (Metcalfe) as a comorbidity covariate candidate focused on Renal disease.'] + recommendations: + cipher:17322 | Renal Sclerosis NOS (VADC) | This phenotype directly addresses renal sclerosis based on ICD codes, aligning with the study intent of Veteran patients with renal sclerosis. + cipher:18902 | Renal failure (PERC) | Identifies moderate and severe renal failure, a closely related condition to renal sclerosis, suitable as a comorbidity covariate. + cipher:31257 | Renal disease - Elixhauser Primary Care (Metcalfe) | Captures renal disease based on primary care Read codes, relevant to understanding renal issues in the Veteran population. + +CASE 42: Veteran patients with polymyalgia rheumatica + intent_facets_raw: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Veteran', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Patient'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'polymyalgia rheumatica', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'Veteran', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': ['Patient'], 'care_setting_cues': ['Outpatient care'], 'population_cues': ['Veterans']} + planning_shortlist: ['cipher:30277', 'cipher:17453', 'cipher:3460'] + planning_reasoning: ['Selected shortlisted candidates align with polymyalgia rheumatica as a diagnosis-oriented study intent.', 'Included Polymyalgia Rheumatica (HDR UK) as a diagnosis candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (VADC) as a comorbidity covariate candidate focused on Polymyalgia Rheumatica.', 'Included Polymyalgia Rheumatica (MAP) as a comorbidity covariate candidate focused on Polymyalgia Rheumatica.'] + recommendations: + cipher:30277 | Polymyalgia Rheumatica (HDR UK) | This phenotype is based on ICD-10 codes and is directly relevant to the study intent of identifying patients with polymyalgia rheumatica. + cipher:17453 | Polymyalgia Rheumatica (VADC) | This phenotype, derived from the VA Data Commons, represents a comorbidity/covariate related to polymyalgia rheumatica and aligns with the study’s focus. + cipher:3460 | Polymyalgia Rheumatica (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 43: Veteran patients with autoimmune hemolytic anemia + intent_facets_raw: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'autoimmune hemolytic anemia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['ohdsi:1018', 'ohdsi:738', 'cipher:18441'] + planning_reasoning: ['Selected shortlisted candidates align with autoimmune hemolytic anemia as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) as a diagnosis candidate focused on Autoimmune Hemolytic Anemia.', 'Included [D] Autoimmune hemolytic anemia as a diagnosis candidate focused on Autoimmune hemolytic anemia.', 'Included Autoimmune hemolytic anemia (OHDSI) as a diagnosis candidate focused on Autoimmune hemolytic anemia.'] + recommendations: + ohdsi:1018 | [P] Earliest event of Warm Autoimmune Hemolytic Anemia (wAIHA) | This phenotype directly addresses the study intent of identifying the earliest diagnosis of Warm Autoimmune Hemolytic Anemia in veteran patients. + ohdsi:738 | [D] Autoimmune hemolytic anemia | This phenotype captures the diagnosis of Autoimmune hemolytic anemia, a relevant condition for the study. + cipher:18441 | Autoimmune hemolytic anemia (OHDSI) | This phenotype is a well-established OHDSI phenotype for Autoimmune hemolytic anemia, providing a comprehensive representation of the condition. + +CASE 44: Veteran patients with cardiac complications + intent_facets_raw: {'condition_or_topic': 'cardiac complications', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Veterans']} + intent_facets_effective: {'condition_or_topic': 'cardiac complications', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['Veterans']} + planning_shortlist: ['ohdsi:1081'] + planning_reasoning: ['Selected shortlisted candidates align with cardiac complications as a diagnosis-oriented study intent.', 'Included [P] FDA AESI Acute Myocardial Infarction or its complications as a diagnosis candidate focused on Acute Myocardial Infarction.'] + recommendations: + ohdsi:1081 | [P] FDA AESI Acute Myocardial Infarction or its complications | This phenotype directly addresses acute myocardial infarction, a critical diagnosis in cardiology and aligns with the study intent of 'Veteran patients with cardiac complications'. + +CASE 45: Patients diagnosed with fasciitis + intent_facets_raw: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'fasciitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1075', 'cipher:15684'] + planning_reasoning: ['Selected shortlisted candidates align with fasciitis as a diagnosis-oriented study intent.', 'Included [P] FDA AESI Narcolepsy as a diagnosis candidate focused on Narcolepsy.', 'Included Fasciitis (gwPheWAS) as a diagnosis candidate focused on Fasciitis.'] + recommendations: + ohdsi:1075 | [P] FDA AESI Narcolepsy | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:15684 | Fasciitis (gwPheWAS) | This phenotype definition represents a diagnosis of Fasciitis, based on ICD codes used in the Million Veteran Program (MVP) phenome-wide GWAS, aligning with the study intent. + +CASE 46: Patients with stomatitis or mucositis + intent_facets_raw: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': ['Mucositis', 'Oral Mucositis', 'Oral Stomatitis'], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Oral inflammation', 'Mucosal inflammation'], 'care_setting_cues': ['Dental care', 'Oral care'], 'population_cues': ['Patients with oral conditions', 'Patients with mucosal conditions']} + intent_facets_effective: {'condition_or_topic': 'Stomatitis', 'clinical_topic_aliases': ['mucositis', 'oral mucositis', 'oral stomatitis'], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': ['Oral inflammation', 'Mucosal inflammation'], 'care_setting_cues': ['Dental care', 'Oral care'], 'population_cues': ['Patients with oral conditions', 'Patients with mucosal conditions']} + planning_shortlist: ['cipher:17298', 'cipher:15333', 'cipher:3657'] + planning_reasoning: ['Selected shortlisted candidates align with Stomatitis as a diagnosis-oriented study intent.', 'Included Stomatitis and Mucositis (Ulcerative) (VADC) as a complication candidate focused on Stomatitis and Mucositis.', 'Included Stomatitis and mucositis (gwPheWAS) as a complication candidate focused on Stomatitis and mucositis.', 'Included Stomatitis and mucositis (ulcerative) (MAP) as a diagnosis candidate focused on Stomatitis and mucositis (ulcerative).'] + recommendations: + cipher:17298 | Stomatitis and Mucositis (Ulcerative) (VADC) | This phenotype directly aligns with the study intent: patients with stomatitis or mucositis. It's a well-defined VA phenotype. + cipher:15333 | Stomatitis and mucositis (gwPheWAS) | Another phenotype representing stomatitis and mucositis, relevant to the study intent. + cipher:3657 | Stomatitis and mucositis (ulcerative) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 47: Patients with Barretts esophagus + intent_facets_raw: {'condition_or_topic': "Barrett's esophagus", 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': "Barrett's esophagus", 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:15342', 'cipher:30228', 'cipher:2187'] + planning_reasoning: ["Selected shortlisted candidates align with Barrett's esophagus as a diagnosis-oriented study intent.", "Included Barrett's esophagus (gwPheWAS) as a diagnosis candidate focused on Barrett's esophagus.", "Included Barrett's Oesophagus (HDR UK) as a diagnosis candidate focused on Barrett's Oesophagus.", "Included Barrett's esophagus (MAP) as a diagnosis candidate focused on Barrett's esophagus."] + recommendations: + cipher:15342 | Barrett's esophagus (gwPheWAS) | This phenotype definition was used in the Million Veteran Program phenome-wide GWAS and directly addresses the study intent of patients with Barrett's esophagus. + cipher:30228 | Barrett's Oesophagus (HDR UK) | This phenotype definition represents Barrett's Oesophagus based on ICD10 codes, Med Codes, and Read codes and aligns with the study intent. + cipher:2187 | Barrett's esophagus (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 48: Patients with regional enteritis + intent_facets_raw: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'regional enteritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:884', 'cipher:3534'] + planning_reasoning: ['Selected shortlisted candidates align with regional enteritis as a diagnosis-oriented study intent.', 'Included [P] Diarrhea including enteritis as a diagnosis candidate focused on Diarrhea.', 'Included Regional enteritis (MAP) as a diagnosis candidate focused on Regional enteritis.'] + recommendations: + ohdsi:884 | [P] Diarrhea including enteritis | Diarrhea is a common symptom associated with regional enteritis and this phenotype identifies its presence. + cipher:3534 | Regional enteritis (MAP) | This phenotype identifies patients with Regional enteritis based on a MAP algorithm probability cutoff of 0.45, aligning with the study intent. + +CASE 49: Patients with primary localized osteoarthritis + intent_facets_raw: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'osteoarthritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['primary localized osteoarthritis'], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['cipher:3192', 'cipher:4399', 'cipher:3190', 'cipher:4029'] + planning_reasoning: ['Selected shortlisted candidates align with osteoarthritis as a diagnosis-oriented study intent.', 'Included Osteoarthrosis, localized, primary (MAP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Finger Osteoarthritis (MVP) as a diagnosis candidate focused on Osteoarthritis.', 'Included Osteoarthritis; localized (MAP) as a diagnosis candidate focused on Osteoarthritis localized.'] + recommendations: + cipher:3192 | Osteoarthrosis, localized, primary (MAP) | This phenotype directly aligns with the study intent of identifying patients with primary localized osteoarthritis using the MAP algorithm with a probability cutoff of 0.41. + cipher:4399 | Finger Osteoarthritis (MVP) | This phenotype represents osteoarthritis specifically affecting the fingers, which can be relevant to the broader context of osteoarthritis and potentially linked to the study's focus on localized for + cipher:3190 | Osteoarthritis; localized (MAP) | This phenotype is also based on the MAP unsupervised clustering algorithm and focuses on the diagnosis of localized osteoarthritis, complementing the primary recommendation. + +CASE 50: Patients with aortic valve disease + intent_facets_raw: {'condition_or_topic': 'aortic valve disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'aortic valve disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1172', 'cipher:30301', 'cipher:31315'] + planning_reasoning: ['Selected shortlisted candidates align with aortic valve disease as a diagnosis-oriented study intent.', 'Included [P] Heart valve disorder 10 as a comorbidity covariate candidate focused on Heart valve disorder.', 'Included Nonrheumatic Aortic Valve Disorders (HDR UK) as a diagnosis candidate focused on Aortic Valve Disorders.', 'Included Valvular Disease - Elixhauser Primary Care (Metcalfe) as a comorbidity covariate candidate focused on Valvular Disease.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:1103.'] + recommendations: + ohdsi:1172 | [P] Heart valve disorder 10 | This phenotype directly addresses aortic valve disease as a comorbidity/covariate, aligning with the study intent. + cipher:30301 | Nonrheumatic Aortic Valve Disorders (HDR UK) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:31315 | Valvular Disease - Elixhauser Primary Care (Metcalfe) | Selected from the top reranked shortlisted candidates as a clinically aligned comorbidity covariate match. + +CASE 51: Patients with chronic periodontitis + intent_facets_raw: {'condition_or_topic': 'chronic periodontitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'chronic periodontitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13494', 'cipher:3397', 'cipher:2371'] + planning_reasoning: ['Selected shortlisted candidates align with chronic periodontitis as a diagnosis-oriented study intent.', 'Included Chronic Periodontitis (Phecode) as a diagnosis candidate focused on Chronic Periodontitis.', 'Included Periodontitis (acute or chronic) (MAP) as a diagnosis candidate focused on Periodontitis.', 'Included Chronic periodontitis (MAP) as a diagnosis candidate focused on Chronic Periodontitis.'] + recommendations: + cipher:13494 | Chronic Periodontitis (Phecode) | This phenotype directly addresses chronic periodontitis based on ICD codes and was designed for phenome-wide association studies. + cipher:3397 | Periodontitis (acute or chronic) (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + cipher:2371 | Chronic periodontitis (MAP) | This phenotype also identifies chronic periodontitis using the MAP algorithm with a specified probability threshold, providing an alternative diagnostic approach. + +CASE 52: Patients with hypertensive chronic kidney disease + intent_facets_raw: {'condition_or_topic': 'hypertensive chronic kidney disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'hypertensive chronic kidney disease', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:1191', 'ohdsi:964', 'cipher:31686', 'cipher:30635'] + planning_reasoning: ['Selected shortlisted candidates align with hypertensive chronic kidney disease as a diagnosis-oriented study intent.', 'Included [P] Chronic kidney disease or end stage renal disease 10 as a comorbidity covariate candidate focused on Chronic Kidney Disease.', 'Included [P] Chronic kidney disease as a comorbidity covariate candidate focused on Chronic Kidney Disease.', 'Included Chronic Kidney Disease (Dashtban) as a comorbidity covariate candidate focused on Chronic Kidney Disease.'] + recommendations: + ohdsi:1191 | [P] Chronic kidney disease or end stage renal disease 10 | This phenotype is a detailed representation of CKD and ESRD, suitable for assessing patient risk in the context of hypertensive patients. + ohdsi:964 | [P] Chronic kidney disease | This phenotype directly aligns with 'hypertensive chronic kidney disease' and represents a key comorbidity covariate for this patient population. + cipher:31686 | Chronic Kidney Disease (Dashtban) | The HDR UK phenotype provides a comprehensive assessment of CKD, relevant for studying cardiovascular disease and COVID-19 impact in hypertensive patients. + +CASE 53: Patients with cardiomyopathy + intent_facets_raw: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'cardiomyopathy', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'adult', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:30192', 'cipher:31252', 'ohdsi:679', 'cipher:30174'] + planning_reasoning: ['Selected shortlisted candidates align with cardiomyopathy as a diagnosis-oriented study intent.', 'Included Other Cardiomyopathy (HDR UK) as a diagnosis candidate focused on Cardiomyopathy.', 'Included Cardiomyopathy (Knight) as a diagnosis candidate focused on Cardiomyopathy.', 'Included [P][R] Takotsubo cardiomyopathy as a diagnosis candidate focused on Takotsubo cardiomyopathy.'] + recommendations: + cipher:30192 | Other Cardiomyopathy (HDR UK) | This phenotype directly addresses the study intent of patients with cardiomyopathy. + cipher:31252 | Cardiomyopathy (Knight) | This phenotype also identifies patients with cardiomyopathy and is a relevant consideration. + ohdsi:679 | [P][R] Takotsubo cardiomyopathy | This phenotype aligns with the study intent of identifying patients with Takotsubo cardiomyopathy, a specific form of cardiomyopathy. + +CASE 54: Patients with scleritis or episcleritis + intent_facets_raw: {'condition_or_topic': 'scleritis or episcleritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'inflammatory eye disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'scleritis or episcleritis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'inflammatory eye disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:30069'] + planning_reasoning: ['Selected shortlisted candidates align with scleritis or episcleritis as a diagnosis-oriented study intent.', 'Included Scleritis and Episcleritis (HDR UK) as a diagnosis candidate focused on Scleritis and Episcleritis.'] + recommendations: + cipher:30069 | Scleritis and Episcleritis (HDR UK) | This phenotype directly addresses the study intent of identifying patients with scleritis or episcleritis based on HDR UK criteria. + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + intent_facets_raw: {'condition_or_topic': 'carbohydrate transport and metabolism disorder', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'carbohydrate transport and metabolism disorder', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:12820', 'cipher:12818', 'cipher:3256', 'cipher:17097'] + planning_reasoning: ['Selected shortlisted candidates align with carbohydrate transport and metabolism disorder as a diagnosis-oriented study intent.', 'Included Other Disorders of Carbohydrate Transport and Metabolism (Phecode) as a comorbidity covariate candidate focused on Carbohydrate Transport and Metabolism Disorders.', 'Included Disorders of Carbohydrate Transport and Metabolism (Phecode) as a comorbidity covariate candidate focused on Disorders of Carbohydrate Transport and Metabolism.', 'Included Other disorders of carbohydrate transport and metabolism (MAP) as a comorbidity covariate candidate focused on Carbohydrate Transport Metabolism.'] + recommendations: + cipher:12820 | Other Disorders of Carbohydrate Transport and Metabolism (Phecode) | This Phecode also relates to disorders of carbohydrate transport and metabolism, providing a complementary option for identifying patients with this condition. + cipher:12818 | Disorders of Carbohydrate Transport and Metabolism (Phecode) | This phenotype represents a comorbidity/covariate based on the Phecode grouping, directly related to the study intent of carbohydrate transport and metabolism disorders. + cipher:3256 | Other disorders of carbohydrate transport and metabolism (MAP) | This phenotype utilizes the MAP algorithm to identify patients with a probability exceeding 0.6 of having carbohydrate transport and metabolism disorders, providing an alternative approach based on un + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + intent_facets_raw: {'condition_or_topic': 'acetaminophen', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'inpatient', 'population_cue': 'hospital', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['hospital'], 'population_cues': ['inpatient']} + intent_facets_effective: {'condition_or_topic': 'acetaminophen', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'inpatient', 'population_cue': 'hospital', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['hospital'], 'population_cues': ['inpatient']} + planning_shortlist: ['ohdsi:1187'] + planning_reasoning: ['Selected shortlisted candidates align with acetaminophen as a medication based-oriented study intent.', 'Included [P] acetaminophen exposure 10 as a medication based candidate focused on acetaminophen exposure.'] + recommendations: + ohdsi:1187 | [P] acetaminophen exposure 10 | This phenotype directly reflects acetaminophen exposure with a 30-day persistence window, aligning with the study intent of patients with acetaminophen exposure in the hospital setting. + +CASE 57: Patients diagnosed with dyschromia and vitiligo + intent_facets_raw: {'condition_or_topic': 'dyschromia and vitiligo', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'dyschromia and vitiligo', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:13900', 'ohdsi:471', 'cipher:2628'] + planning_reasoning: ['Selected shortlisted candidates align with dyschromia and vitiligo as a diagnosis-oriented study intent.', 'Included Dyschromia and Vitiligo (Phecode) as a diagnosis candidate focused on Dyschromia and Vitiligo.', 'Included [P][R] Vitiligo as a diagnosis candidate focused on Vitiligo.', 'Included Dyschromia and Vitiligo (MAP) as a diagnosis candidate focused on Dyschromia and Vitiligo.'] + recommendations: + cipher:13900 | Dyschromia and Vitiligo (Phecode) | This phenotype directly addresses the study intent of identifying patients with dyschromia and vitiligo based on ICD codes. + ohdsi:471 | [P][R] Vitiligo | This phenotype represents the diagnosis of Vitiligo, a key component of the study intent. + cipher:2628 | Dyschromia and Vitiligo (MAP) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + intent_facets_raw: {'condition_or_topic': 'acute hepatic injury', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'no pre-existing liver disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'acute hepatic injury', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'no pre-existing liver disease', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:735', 'ohdsi:294', 'ohdsi:293', 'cipher:18447'] + planning_reasoning: ['Selected shortlisted candidates align with acute hepatic injury as a diagnosis-oriented study intent.', 'Included [P] Acute Liver Injury indexed on diagnosis or symptoms with no chronic hepatic failure as a diagnosis candidate focused on Acute Liver Injury.', 'Included [P] Acute Hepatic Injury with no pre-existing liver disease as a diagnosis candidate focused on Acute Liver Injury.', 'Included [P] Acute Hepatic Injury or inpatient jaundice as a diagnosis candidate focused on Acute Liver Injury.'] + recommendations: + ohdsi:735 | [P] Acute Liver Injury indexed on diagnosis or symptoms with no chronic hepatic failure | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + ohdsi:294 | [P] Acute Hepatic Injury with no pre-existing liver disease | This phenotype directly addresses the study intent by identifying the earliest event of Acute Liver Injury in patients without pre-existing liver disease, excluding those with chronic hepatic failure. + ohdsi:293 | [P] Acute Hepatic Injury or inpatient jaundice | This phenotype captures acute liver injury, including jaundice, and excludes chronic liver disease, aligning with the study's focus on acute injury in patients without pre-existing liver disease. + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + intent_facets_raw: {'condition_or_topic': 'nerve plexus lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['patient diagnosis'], 'care_setting_cues': ['acute care', 'chronic care'], 'population_cues': ['neurological patients']} + intent_facets_effective: {'condition_or_topic': 'nerve plexus lesions', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['patient diagnosis'], 'care_setting_cues': ['acute care', 'chronic care'], 'population_cues': ['neurological patients']} + planning_shortlist: ['cipher:13084', 'cipher:13085', 'cipher:14974', 'cipher:3108'] + planning_reasoning: ['Selected shortlisted candidates align with nerve plexus lesions as a diagnosis-oriented study intent.', 'Included Nerve Plexus Lesions (Phecode) as a diagnosis candidate focused on Nerve Plexus Lesions.', 'Included Nerve Root Lesions (Phecode) as a diagnosis candidate focused on Nerve Root Lesions.', 'Included Nerve plexus lesions (gwPheWAS) as a comorbidity covariate candidate focused on Nerve Plexus Lesions.'] + recommendations: + cipher:13084 | Nerve Plexus Lesions (Phecode) | This phenotype directly aligns with the study intent of defining patients with nerve plexus lesions using the Phecode system, which maps ICD codes to clinically relevant phenotypes. + cipher:13085 | Nerve Root Lesions (Phecode) | This phenotype is related to nerve plexus lesions and can be considered as a relevant phenotype based on the study intent. + cipher:14974 | Nerve plexus lesions (gwPheWAS) | This phenotype is based on the Million Veteran Program (MVP) phenome-wide GWAS and utilizes Phecodes, which are clinically relevant phenotypes defined by ICD codes. + +CASE 60: patients with a diagnosis of PRES + intent_facets_raw: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'PRES', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'unknown', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:223'] + planning_reasoning: ['Selected shortlisted candidates align with PRES as a diagnosis-oriented study intent.', 'Included [P] Posterior reversible encephalopathy syndrome PRES as a diagnosis candidate focused on Posterior reversible encephalopathy syndrome (PRES).'] + recommendations: + ohdsi:223 | [P] Posterior reversible encephalopathy syndrome PRES | The study intent is for patients with a diagnosis of PRES. This phenotype directly represents the diagnosis of PRES as defined in the OHDSI phenotype library. + +CASE 61: patients with chronic ulcerative colitis + intent_facets_raw: {'condition_or_topic': 'ulcerative colitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'ulcerative colitis', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'chronic', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['ohdsi:860', 'ohdsi:458', 'ohdsi:201', 'cipher:30724'] + planning_reasoning: ['Selected shortlisted candidates align with ulcerative colitis as a diagnosis-oriented study intent.', 'Included [P] Earliest event of Ulcerative colitis as a diagnosis candidate focused on Ulcerative colitis.', 'Included [P][R] Ulcerative colitis as a diagnosis candidate focused on Ulcerative colitis.', 'Included [P] Ulcerative colitis or complications as a diagnosis candidate focused on Ulcerative colitis.'] + recommendations: + ohdsi:860 | [P] Earliest event of Ulcerative colitis | This phenotype identifies the earliest diagnosis of Ulcerative colitis, aligning with the study intent of patients with chronic ulcerative colitis. + ohdsi:458 | [P][R] Ulcerative colitis | This phenotype represents all events of Ulcerative colitis, which is relevant to understanding the patient population with this condition. + ohdsi:201 | [P] Ulcerative colitis or complications | This phenotype captures Ulcerative colitis and associated complications, providing a comprehensive view of the patient's condition. + +CASE 62: Veteran patients with developmental disorders that are pervasive + intent_facets_raw: {'condition_or_topic': 'developmental disorders', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'developmental disorders', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'Veteran patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: ['cipher:17197', 'cipher:17193', 'cipher:18933', 'cipher:17141'] + planning_reasoning: ['Selected shortlisted candidates align with developmental disorders as a diagnosis-oriented study intent.', 'Included Develomental Delays and Disorders (VADC) as a comorbidity covariate candidate focused on Developmental Delays and Disorders.', 'Included Pervasive Developmental Disorders (VADC) as a comorbidity covariate candidate focused on Pervasive Developmental Disorders.', 'Included Mental Health (PERC) as a diagnosis candidate focused on Mental Health Disorders.'] + recommendations: + cipher:17197 | Develomental Delays and Disorders (VADC) | This phenotype aligns directly with the study intent of identifying developmental delays and disorders in veteran patients. + cipher:17193 | Pervasive Developmental Disorders (VADC) | This phenotype also relates to pervasive developmental disorders, complementing the broader focus of the study. + cipher:18933 | Mental Health (PERC) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + intent_facets_raw: {'condition_or_topic': 'acute myocardial infarction', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'patients with multiple diagnoses', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['multiple diagnoses'], 'care_setting_cues': ['inpatient', 'emergency department', 'any setting'], 'population_cues': ['patients with multiple diagnoses']} + intent_facets_effective: {'condition_or_topic': 'acute myocardial infarction', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient|ed|any', 'population_cue': 'patients with multiple diagnoses', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['multiple diagnoses'], 'care_setting_cues': ['inpatient', 'emergency department', 'any setting'], 'population_cues': ['patients with multiple diagnoses']} + planning_shortlist: ['ohdsi:510', 'ohdsi:1081', 'cipher:18982', 'cipher:31590'] + planning_reasoning: ['Selected shortlisted candidates align with acute myocardial infarction as a diagnosis-oriented study intent.', 'Included [P][R] Acute myocardial infarction as a diagnosis candidate focused on Acute myocardial infarction.', 'Included [P] FDA AESI Acute Myocardial Infarction or its complications as a diagnosis candidate focused on Acute Myocardial Infarction.', 'Included Acute Myocardial Infarction (VA CAUSAL Methods) as a diagnosis candidate focused on Acute Myocardial Infarction.'] + recommendations: + ohdsi:510 | [P][R] Acute myocardial infarction | This phenotype identifies patients with Acute myocardial infarction and aligns with the study intent of identifying patients with at least 2 recorded diagnoses. + ohdsi:1081 | [P] FDA AESI Acute Myocardial Infarction or its complications | This phenotype identifies patients with Acute Myocardial Infarction, a critical diagnosis in cardiology and directly addresses the study intent of identifying patients with at least 2 recorded diagnos + cipher:18982 | Acute Myocardial Infarction (VA CAUSAL Methods) | This phenotype identifies individuals with two or more prior diagnoses of Acute Myocardial Infarction based on ICD-10 codes, fulfilling the study intent of identifying patients with multiple diagnoses + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + intent_facets_raw: {'condition_or_topic': 'Antiphospholipid syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'Antiphospholipid syndrome', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['ohdsi:632', 'ohdsi:781'] + planning_reasoning: ['Selected shortlisted candidates align with Antiphospholipid syndrome as a diagnosis-oriented study intent.', 'Included [P][R] Antiphospholipid syndrome as a comorbidity covariate candidate focused on Antiphospholipid syndrome.', 'Included [P] Antiphospholipid syndrome as a comorbidity covariate candidate focused on Antiphospholipid Syndrome.'] + recommendations: + ohdsi:632 | [P][R] Antiphospholipid syndrome | This phenotype represents all events of Antiphospholipid syndrome, aligning with the study intent of patients diagnosed with this condition. + ohdsi:781 | [P] Antiphospholipid syndrome | This phenotype represents the first occurrence of Antiphospholipid Syndrome, capturing the baseline presence of the condition as relevant to the study. + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + intent_facets_raw: {'condition_or_topic': 'ADRD or late-stage dementia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + intent_facets_effective: {'condition_or_topic': 'ADRD or late-stage dementia', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'older adults', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['older adults']} + planning_shortlist: ['ohdsi:651', 'ohdsi:864', 'ohdsi:927', 'cipher:31120'] + planning_reasoning: ['Selected shortlisted candidates align with ADRD or late-stage dementia as a diagnosis-oriented study intent.', 'Included [P][R] Dementia as a diagnosis candidate focused on Dementia.', 'Included [P] Earliest event of Dementia as a diagnosis candidate focused on Dementia.', 'Included [P] Dementia2 as a diagnosis candidate focused on Dementia.'] + recommendations: + ohdsi:651 | [P][R] Dementia | This phenotype represents Dementia, a condition primarily used for diagnosis, aligning with the study intent of older adults with a likely diagnosis of ADRD or late-stage dementia. + ohdsi:864 | [P] Earliest event of Dementia | This phenotype identifies the diagnosis of dementia in patients 18 years or older, relevant to the study's focus on older adults. + ohdsi:927 | [P] Dementia2 | This phenotype identifies the initial record of Dementia, primarily representing the diagnosis of dementia, which aligns with the study intent. + +CASE 66: patients who experienced a GI bleed adverse event + intent_facets_raw: {'condition_or_topic': 'GI bleed', 'clinical_topic_aliases': ['Gastrointestinal bleeding', 'Gastrointestinal hemorrhage'], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Adverse event'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients experiencing adverse events']} + intent_facets_effective: {'condition_or_topic': 'GI bleed', 'clinical_topic_aliases': ['gastrointestinal bleeding', 'gastrointestinal hemorrhage'], 'phenotype_role': 'outcome', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'not_specified', 'role_cues': ['Adverse event'], 'care_setting_cues': ['any clinical setting'], 'population_cues': ['patients experiencing adverse events']} + planning_shortlist: ['ohdsi:482', 'ohdsi:888', 'ohdsi:417', 'ohdsi:349'] + planning_reasoning: ['Selected shortlisted candidates align with GI bleed as a outcome-oriented study intent.', 'Included [P][R] Gastrointestinal hemorrhage as a outcome candidate focused on Gastrointestinal hemorrhage.', 'Included [P] Gastrointestinal bleeding as a outcome candidate focused on Gastrointestinal Bleeding.', 'Included [P] Acute gastrointestinal bleeding events as a outcome candidate focused on Gastrointestinal Bleeding.'] + recommendations: + ohdsi:482 | [P][R] Gastrointestinal hemorrhage | This phenotype directly represents the event of gastrointestinal hemorrhage, aligning with the study intent of patients who experienced a GI bleed adverse event. + ohdsi:888 | [P] Gastrointestinal bleeding | This phenotype identifies patients experiencing gastrointestinal bleeding, a related adverse event to the study intent, primarily in inpatient or ER settings. + ohdsi:417 | [P] Acute gastrointestinal bleeding events | This phenotype represents events of gastrointestinal bleed, an outcome measure, closely related to the study intent. + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + intent_facets_raw: {'condition_or_topic': 'COVID-19', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + intent_facets_effective: {'condition_or_topic': 'COVID-19', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'outpatient', 'population_cue': 'patients', 'validation_preference': 'not_specified', 'executability_preference': 'not_specified', 'geography_coding_preference': 'not_specified', 'role_cues': [], 'care_setting_cues': ['outpatient'], 'population_cues': ['patients']} + planning_shortlist: ['cipher:31308', 'ohdsi:678'] + planning_reasoning: ['Selected shortlisted candidates align with COVID-19 as a diagnosis-oriented study intent.', 'Included Confirmed COVID-19 Diagnoses (Knight) as a diagnosis candidate focused on Confirmed COVID-19.', 'Included [P][R] COVID-19 as a diagnosis candidate focused on COVID-19.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:47, ohdsi:59.'] + recommendations: + cipher:31308 | Confirmed COVID-19 Diagnoses (Knight) | This phenotype identifies patients diagnosed with COVID-19 based on SNOMED CT codes, which is relevant to the outpatient diagnosis study intent. + ohdsi:678 | [P][R] COVID-19 | This phenotype represents the diagnosis of COVID-19, aligning with the study intent of patients who received a COVID-19 diagnosis in the outpatient setting. + +CASE 68: veterans who experienced an abdominal aortic aneurysm + intent_facets_raw: {'condition_or_topic': 'abdominal aortic aneurysm', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veterans', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + intent_facets_effective: {'condition_or_topic': 'abdominal aortic aneurysm', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'veterans', 'validation_preference': 'not_specified', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'va', 'role_cues': [], 'care_setting_cues': [], 'population_cues': ['veterans']} + planning_shortlist: [] + planning_reasoning: ['Selected shortlisted candidates align with abdominal aortic aneurysm as a diagnosis-oriented study intent.', 'Shortlist replaced lower-quality candidates after rerank enforcement: ohdsi:1290, ohdsi:866.'] + recommendations: + +CASE 69: patients with COPD according to diagnostic codes in the EHR + intent_facets_raw: {'condition_or_topic': 'COPD', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + intent_facets_effective: {'condition_or_topic': 'COPD', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'us_omop', 'role_cues': [], 'care_setting_cues': [], 'population_cues': []} + planning_shortlist: ['cipher:29794', 'cipher:29756', 'cipher:31297', 'ohdsi:1192'] + planning_reasoning: ['Selected shortlisted candidates align with COPD as a diagnosis-oriented study intent.', 'Included Chronic Obstructive Pulmonary Disease, ICD, Read, and Med Codes (HDR UK) as a diagnosis candidate focused on COPD.', 'Included Chronic Obstructive Pulmonary Disease (HR UK) as a diagnosis candidate focused on Chronic Obstructive Pulmonary Disease (COPD).', 'Included Chronic Obstructive Pulmonary Disease (Knight) as a diagnosis candidate focused on Chronic Obstructive Pulmonary Disease (COPD).'] + recommendations: + cipher:29794 | Chronic Obstructive Pulmonary Disease, ICD, Read, and Med Codes (HDR UK) | This phenotype directly addresses the study intent of identifying patients with COPD based on ICD codes. + cipher:29756 | Chronic Obstructive Pulmonary Disease (HR UK) | This phenotype represents a diagnosis for Chronic Obstructive Pulmonary Disease (COPD), based on ICD10 codes and aligns with the study intent. + cipher:31297 | Chronic Obstructive Pulmonary Disease (Knight) | This phenotype identifies patients with Chronic Obstructive Pulmonary Disease based on ICD-10 and SNOMED CT codes, meeting the study's diagnostic criteria. + +CASE 70: patients hospitalized at least once for heart failure + intent_facets_raw: {'condition_or_topic': 'heart failure', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'patients hospitalized', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Hospitalized patient'], 'care_setting_cues': ['Inpatient'], 'population_cues': ['Hospitalized patients']} + intent_facets_effective: {'condition_or_topic': 'heart failure', 'clinical_topic_aliases': [], 'phenotype_role': 'diagnosis', 'care_setting': 'inpatient', 'population_cue': 'patients hospitalized', 'validation_preference': 'required', 'executability_preference': 'prefer_native_ohdsi', 'geography_coding_preference': 'us_omop', 'role_cues': ['Hospitalized patient'], 'care_setting_cues': ['Inpatient'], 'population_cues': ['Hospitalized patients']} + planning_shortlist: ['ohdsi:934', 'ohdsi:1303', 'cipher:16152'] + planning_reasoning: ['Selected shortlisted candidates align with heart failure as a diagnosis-oriented study intent.', 'Included [P] Heart failure2 as a diagnosis candidate focused on Heart Failure.', 'Included [P] Acute Heart failure from legend as a diagnosis candidate focused on Heart Failure.', 'Included Heart Failure (BOS CSPCC) as a diagnosis candidate focused on Heart Failure.'] + recommendations: + ohdsi:934 | [P] Heart failure2 | This phenotype represents the initial diagnosis of Heart Failure, which aligns with the study intent of patients hospitalized at least once for heart failure. + ohdsi:1303 | [P] Acute Heart failure from legend | This phenotype identifies the first recorded episode of heart failure in a patient, followed by at least one subsequent heart failure condition record, which is relevant to hospitalized patients with + cipher:16152 | Heart Failure (BOS CSPCC) | Selected from the top reranked shortlisted candidates as a clinically aligned diagnosis match. + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + intent_facets_raw: {'condition_or_topic': 'diabetes', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication use'], 'care_setting_cues': ['any setting'], 'population_cues': ['patients with diabetes']} + intent_facets_effective: {'condition_or_topic': 'diabetes', 'clinical_topic_aliases': [], 'phenotype_role': 'medication_based', 'care_setting': 'any', 'population_cue': 'patients', 'validation_preference': 'preferred', 'executability_preference': 'allow_translation', 'geography_coding_preference': 'not_specified', 'role_cues': ['medication use'], 'care_setting_cues': ['any setting'], 'population_cues': ['patients with diabetes']} + planning_shortlist: ['cipher:31250', 'cipher:31195', 'cipher:30170', 'cipher:3760'] + planning_reasoning: ['Selected shortlisted candidates align with diabetes as a medication based-oriented study intent.', 'Included Diabetes and Diabates Medication (Knight) as a comorbidity covariate candidate focused on Diabetes.', 'Included Diabetes, Drug Code (Paige) as a comorbidity covariate candidate focused on Diabetes.', 'Included Diabetes, ICD, Read, and Med Codes (HDR UK) as a diagnosis candidate focused on Diabetes Mellitus.'] + recommendations: + cipher:31250 | Diabetes and Diabates Medication (Knight) | This HDR UK phenotype based on ICD10 codes and SNOMED CT codes aligns with the study intent of identifying patients with diabetes based on medication use. + cipher:31195 | Diabetes, Drug Code (Paige) | This HDR UK phenotype based on Read codes v2 is a diabetes phenotype focused on medication use, fitting the user's intent. + cipher:30170 | Diabetes, ICD, Read, and Med Codes (HDR UK) | This phenotype, based on ICD-10 codes and Med Codes, is suitable for diagnosis of diabetes which is aligned with the study intent. + +CASE 1: Patients with an implanted cardiac defibrillator + cipher:30773 | -5.3875 | [('topic_mismatch', 'Trifascicular Block')] + cipher:30192 | -5.3875 | [('topic_mismatch', 'Cardiomyopathy')] + cipher:16289 | -5.3875 | [('topic_mismatch', 'Bleeding')] + cipher:31291 | -12.3875 | [('topic_mismatch', 'Life Threatening Arrhythmias')] + cipher:30617 | -12.8675 | [('topic_mismatch', 'Coronary Heart Disease')] + ohdsi:1102 | -21.65 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + ohdsi:1314 | -21.67 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + ohdsi:875 | -21.69 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + +CASE 2: Patients diagnosed with fasciitis + cipher:15684 | 35.6125 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.71 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + cipher:29553 | -3.8275 | [('topic_mismatch', 'Sleep Apnea')] + cipher:30170 | -3.8875 | [('topic_mismatch', 'Diabetes Mellitus')] + +CASE 3: Patients with acute prostatitis + ohdsi:283 | 34.35 | [('topic_primary', 'Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Prostatitis"]}')] + cipher:13720 | 28.9425 | [('topic_primary', 'Acute Prostatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Prostatitis"]}')] + cipher:15499 | 15.1325 | [('topic_primary', 'Acute Prostatitis')] + cipher:18650 | 13.7554 | [('topic_primary', 'Chronic Prostatitis or Chronic Pelvic Pain Syndrome (MVP)')] + ohdsi:1301 | -5.21 | [('topic_mismatch', 'Urinary Tract Infection')] + ohdsi:410 | -5.25 | [('topic_mismatch', 'Urinary Tract Infection')] + cipher:30181 | -5.3875 | [('topic_mismatch', 'Non-Acute Cystitis')] + cipher:29775 | -5.3875 | [('topic_mismatch', 'Prostate Hyperplasia')] + +CASE 4: Patients who underwent esophagectomy + ohdsi:1097 | 32.1033 | [('topic_primary', 'Esophagectomy'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Esophagectomy"]}')] + ohdsi:1294 | 26.33 | [('topic_primary', 'Esophagectomy')] + ohdsi:870 | 26.31 | [('topic_primary', 'Esophagectomy')] + ohdsi:1309 | 22.35 | [('topic_primary', 'Esophagectomy')] + ohdsi:877 | -5.75 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1289 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:1106 | -9.25 | [('topic_mismatch', 'Surgery')] + +CASE 5: Patients diagnosed with peripheral neuritis + ohdsi:388 | 35.85 | [('topic_primary', 'Peripheral neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral neuritis"]}')] + ohdsi:389 | 20.08 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathy"]}')] + ohdsi:238 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + ohdsi:540 | 20.0 | [('topic_primary', 'Optic neuritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Optic neuritis"]}')] + cipher:30768 | 19.8825 | [('topic_primary', 'Peripheral Neuropathies'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Peripheral Neuropathies"]}')] + ohdsi:236 | 19.6233 | [('topic_primary', 'Peripheral Neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic Peripheral Neuropathy"]}')] + ohdsi:541 | 18.31 | [('topic_primary', 'Idiopathic peripheral neuropathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Idiopathic peripheral neuropathy"]}')] + ohdsi:623 | -3.75 | [('topic_mismatch', 'Motor neuropathy')] + +CASE 6: Patients with concomitant TNF-alpha inhibitor and IL-12/23 inhibitor exposure for at least 30 days + ohdsi:760 | 27.6671 | [('topic_primary', 'IL-23 Inhibitors')] + ohdsi:1042 | 27.6471 | [('topic_primary', 'IL-23 inhibitors')] + ohdsi:1040 | 27.6071 | [('topic_primary', 'TNF alpha inhibitors')] + ohdsi:1069 | 25.5 | [('topic_primary', 'TNF inhibitors'), ('topic_context', '{"context_conditions": ["Crohns disease"], "target_conditions": ["TNF inhibitors"]}')] + ohdsi:759 | 23.6871 | [('topic_primary', 'TNF-alpha Inhibitors, IL23 Inhibitors')] + ohdsi:1057 | 20.3414 | [('topic_primary', 'IL-23 inhibitors'), ('topic_context', '{"context_conditions": ["Plaque psoriasis", "Psoriasis vulgaris"], "target_conditions": ["IL23 inhibitors"]}')] + ohdsi:1066 | 19.9881 | [('topic_primary', 'Tumor Necrosis Factor alpha (TNFa) inhibitors')] + ohdsi:1068 | 19.75 | [('topic_primary', 'Tumor Necrosis Factor alpha (TNFa) inhibitors'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Tumor Necrosis Factor alpha (TNFa) inhibitors"]}')] + +CASE 7: Patients with allergic rhinitis + ohdsi:508 | 35.85 | [('topic_primary', 'Allergic rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic rhinitis"]}')] + ohdsi:367 | 35.81 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:2081 | 35.6125 | [('topic_primary', 'Allergic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic Rhinitis"]}')] + cipher:30258 | 30.1925 | [('topic_primary', 'Allergic and Chronic Rhinitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Allergic and chronic rhinitis diagnosis"]}')] + cipher:13411 | 28.1125 | [('topic_primary', 'Allergic Rhinitis')] + ohdsi:12 | 23.1867 | [('topic_primary', 'Rhinitis'), ('topic_context', '{"context_conditions": ["Common cold", "Sinusitis", "Respiratory Symptoms"], "target_conditions": ["Rhinitis"]}')] + ohdsi:370 | 16.25 | [('topic_primary', 'Allergic Disorder')] + ohdsi:369 | 8.79 | [('topic_primary', 'Allergic condition')] + +CASE 8: Patients with ischemic heart disease + ohdsi:654 | 34.25 | [('topic_primary', 'Ischemic heart disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic heart disease"]}')] + cipher:16261 | 34.1125 | [('topic_primary', 'Ischemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischemic Heart Disease"]}')] + ohdsi:532 | 27.25 | [('topic_primary', 'Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Disease"]}')] + cipher:29218 | 23.6925 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:29772 | 23.6325 | [('topic_primary', 'Coronary Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Coronary Heart Disease"]}')] + cipher:30610 | 23.6125 | [('topic_primary', 'Ischaemic Heart Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ischaemic Heart Disease"]}')] + cipher:30617 | 11.2125 | [('topic_primary', 'Coronary Heart Disease')] + cipher:31868 | 9.7792 | [('topic_primary', 'Chronic Ischaemic Heart Disease')] + +CASE 9: Pregnant patients with hemorrhage in early pregnancy or threatened labor + cipher:2798 | 28.6125 | [('topic_primary', 'Hemorrhage in early pregnancy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hemorrhage in early pregnancy"]}')] + cipher:2643 | 20.0096 | [('topic_primary', 'Early or threatened labor; hemorrhage in early pregnancy')] + cipher:13824 | 18.1125 | [('topic_primary', 'Early Labor Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Early Labor Hemorrhage"]}')] + cipher:13827 | 15.7792 | [('topic_primary', 'Hemorrhage in Early Pregnancy')] + cipher:15566 | 14.6125 | [('topic_primary', 'Pregnancy Hemorrhage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pregnancy Hemorrhage"]}')] + cipher:17376 | 7.7792 | [('topic_primary', 'Hemorrhage')] + cipher:2796 | 5.1125 | [('topic_primary', 'Hemorrhage during pregnancy; childbirth and postpartum')] + ohdsi:1434 | 4.0167 | [('topic_primary', 'Pregnancy Loss')] + +CASE 10: Patients who underwent lung resection + ohdsi:1268 | 28.6233 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative Afib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:1308 | 24.5833 | [('topic_primary', 'Lung Resection'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation", "Postoperative AFib"], "target_conditions": ["Lung Resection"]}')] + ohdsi:869 | 22.77 | [('topic_primary', 'Lung Resection')] + ohdsi:1293 | 22.75 | [('topic_primary', 'Lung Resection')] + ohdsi:1289 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:865 | -9.25 | [('topic_mismatch', 'Surgery')] + ohdsi:877 | -9.25 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1106 | -9.25 | [('topic_mismatch', 'Surgery')] + +CASE 11: Patients with laryngitis + ohdsi:355 | 34.35 | [('topic_primary', 'Laryngitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Laryngitis"]}')] + ohdsi:327 | -5.19 | [('topic_mismatch', 'Pharyngitis')] + ohdsi:352 | -5.25 | [('topic_mismatch', 'Inflamed Tonsils')] + cipher:29206 | -5.3075 | [('topic_mismatch', 'Peritonsillar Abscess')] + cipher:29553 | -5.3675 | [('topic_mismatch', 'Sleep Apnea')] + cipher:31166 | -5.3875 | [('topic_mismatch', 'Sore Throat')] + cipher:2559 | -5.3875 | [('topic_mismatch', 'Laryngeal Diseases')] + cipher:29494 | -5.3875 | [('topic_mismatch', 'Aphonia')] + +CASE 12: Patients with regional enteritis + cipher:3534 | 35.6525 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:501 | -3.75 | [('topic_mismatch', "Crohn's disease")] + cipher:30115 | -3.8675 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + cipher:29190 | -11.3875 | [('topic_mismatch', "Crohn's Disease")] + cipher:31254 | -11.3875 | [('topic_mismatch', 'Liver Disease')] + +CASE 13: Patients with renal sclerosis + cipher:13646 | 28.8625 | [('topic_primary', 'Nephritis Nephrosis Renal Sclerosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nephritis Nephrosis Renal Sclerosis"]}')] + cipher:13656 | 26.6125 | [('topic_primary', 'Renal Sclerosis')] + ohdsi:1003 | 18.5 | [('topic_primary', 'Renal cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal cancer"]}')] + ohdsi:481 | 16.79 | [('topic_primary', 'Renal failure syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Renal failure syndrome"]}')] + cipher:31257 | 7.1125 | [('topic_primary', 'Renal disease')] + ohdsi:502 | -5.25 | [('topic_mismatch', 'Kidney Stone')] + cipher:30292 | -5.3675 | [('topic_mismatch', 'Glomerulonephritis')] + ohdsi:964 | -12.67 | [('topic_mismatch', 'Chronic Kidney Disease')] + +CASE 14: Patients with cardiomyopathy + cipher:30192 | 32.9625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 32.5058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 29.08 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 28.9025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 27.5292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 19.1325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + ohdsi:1082 | -5.25 | [('topic_mismatch', 'Myocarditis Pericarditis')] + +CASE 15: Patients with a diagnosis of PRES + ohdsi:223 | 45.15 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)'), ('dynamic_clinical_alias_match', {'alias': 'posterior reversible encephalopathy', 'field': 'primary_clinical_topic', 'topic': 'Posterior reversible encephalopathy syndrome (PRES)'}), ('dynamic_clinical_alias_context', {'alias': 'posterior reversible encephalopathy', 'field': 'target_vs_context_conditions'})] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:1317 | -3.69 | [('topic_mismatch', 'Reye’s syndrome')] + ohdsi:516 | -3.71 | [('topic_mismatch', 'Thrombotic microangiopathy')] + ohdsi:248 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:520 | -3.75 | [('topic_mismatch', 'Hypertensive disorder')] + ohdsi:229 | -3.75 | [('topic_mismatch', 'Progressive Multifocal Leukoencephalopathy (PML)')] + ohdsi:1084 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + +CASE 16: Patients with anorexia nervosa + ohdsi:1340 | 34.29 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + cipher:17187 | 34.1125 | [('topic_primary', 'Anorexia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia Nervosa"]}')] + ohdsi:1339 | 18.5 | [('topic_primary', 'Bulimia Nervosa'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Bulimia Nervosa"]}')] + cipher:30163 | 5.9225 | [('topic_context', '{"context_conditions": [], "target_conditions": ["Anorexia and Bulimia Nervosa"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1341 | -5.23 | [('topic_mismatch', 'Eating Disorders')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + cipher:30717 | -5.2875 | [('topic_mismatch', 'Eating Disorders')] + cipher:31265 | -5.3075 | [('topic_mismatch', 'Eating Disorders')] + +CASE 17: Patients with dizziness, vertigo, or motion sickness + cipher:13215 | 18.8125 | [('topic_primary', 'Dizziness and Giddiness (Lightheadedness and Vertigo)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dizziness and Giddiness (Lightheadedness and Vertigo)"]}')] + ohdsi:893 | 16.29 | [('topic_primary', 'Vertigo')] + cipher:2623 | 14.7792 | [('topic_primary', 'Dizziness and giddiness (Light-headedness and vertigo)')] + ohdsi:244 | 12.35 | [('topic_primary', 'Dizziness')] + cipher:15084 | 6.7792 | [('topic_primary', 'Dizziness and giddiness')] + cipher:4387 | -3.8675 | [('topic_mismatch', 'Vestibular Disorders')] + ohdsi:891 | -7.69 | [('topic_mismatch', 'Nausea')] + ohdsi:449 | -7.75 | [('topic_mismatch', 'Nausea')] + +CASE 18: Patients with polymyalgia rheumatica + cipher:30277 | 34.2125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + ohdsi:670 | -5.21 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -5.23 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -5.25 | [('topic_mismatch', 'Joint stiffness')] + ohdsi:605 | -12.25 | [('topic_mismatch', 'Muscle pain')] + cipher:30630 | -12.8075 | [('topic_mismatch', 'Rheumatoid Arthritis')] + cipher:31127 | -12.8875 | [('topic_mismatch', 'Fibromyalgia Pain')] + cipher:30572 | -12.8875 | [('topic_mismatch', 'Rheumatoid Arthritis')] + +CASE 19: Patients with adverse effects from therapeutic corticosteroid use + cipher:2064 | 13.8925 | [('topic_primary', 'Adrenal Cortical Steroids Adverse Effects'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Adrenal Cortical Steroids Adverse Effects"]}')] + cipher:14303 | 10.3392 | [('topic_primary', 'Adrenal Steroid Adverse Effects')] + cipher:17565 | 9.5192 | [('topic_primary', 'Adverse Effects of Adrenal Steroids')] + cipher:2821 | 8.4658 | [('topic_primary', 'Hormones and Synthetic Substitutes Adverse Effects')] + cipher:18443 | 4.8375 | [('topic_primary', 'Severe Cutaneous Adverse Reaction (SCAR)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Severe Cutaneous Adverse Reaction"]}')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + cipher:18444 | -5.3875 | [('topic_mismatch', 'Drug Rash with Eosinophilia and Systemic Symptoms (DRESS)')] + cipher:30650 | -12.8875 | [('topic_mismatch', 'Smoking Status')] + +CASE 20: Patients with low blood pressure + cipher:13390 | 35.6125 | [('topic_primary', 'Hypotension'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypotension"]}')] + ohdsi:339 | 21.33 | [('topic_primary', 'Hypotension')] + ohdsi:890 | 21.31 | [('topic_primary', 'Hypotension')] + ohdsi:526 | 17.29 | [('topic_primary', 'Orthostatic hypotension')] + ohdsi:997 | 9.35 | [('topic_primary', 'Hypotension')] + ohdsi:954 | -3.73 | [('topic_mismatch', 'Syncope')] + ohdsi:1075 | -3.75 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:646 | -3.75 | [('topic_mismatch', 'Bradycardia')] + +CASE 21: Patients with encephalopathy + ohdsi:194 | 34.31 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:331 | 32.29 | [('topic_primary', 'Encephalopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Encephalopathy"]}')] + ohdsi:223 | 25.7667 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)'), ('topic_context', '{"context_conditions": ["Eclampsia", "Hypertensive encephalopathy"], "target_conditions": ["Posterior reversible encepha... [truncated 19 chars]')] + ohdsi:936 | -2.3929 | [('topic_context', '{"context_conditions": ["Hepatic necrosis", "Hepatic coma", "Hepatic encephalopathy", "Liver failure", "Liver injury"], ... [truncated 41 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:544 | -5.17 | [('topic_mismatch', 'Encephalitis')] + ohdsi:1333 | -5.23 | [('topic_mismatch', 'Advanced Liver Disease')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + cipher:18434 | -5.3875 | [('topic_mismatch', 'Progressive Multifocal Leukoencephalopathy (PML)')] + +CASE 22: Patients with birdshot chorioretinitis + ohdsi:1223 | 8.0167 | [('topic_context', '{"context_conditions": ["Uveitis"], "target_conditions": ["Birdshot chorioretinitis"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1226 | -3.73 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -3.75 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:755 | -3.75 | [('topic_mismatch', 'Uveitis')] + cipher:30185 | -3.8875 | [('topic_mismatch', 'Posterior Uveitis')] + cipher:13118 | -3.8875 | [('topic_mismatch', 'Chorioretinal Inflammations Scars')] + cipher:2341 | -3.8875 | [('topic_mismatch', 'Chorioretinal inflammations, scars, and other disorders of choroid')] + ohdsi:1225 | -10.69 | [('topic_mismatch', 'Uveitis')] + +CASE 23: Older adults with macular degeneration + cipher:30295 | 34.1725 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3006 | 34.1325 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:2505 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:16256 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3005 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:14995 | 34.1125 | [('topic_primary', 'Macular Degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration"]}')] + cipher:3007 | 30.6125 | [('topic_primary', 'Macular Degeneration, Wet'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Macular Degeneration, Wet"]}')] + ohdsi:536 | 29.1 | [('topic_primary', 'Age related macular degeneration'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Age related macular degeneration"]}')] + +CASE 24: Patients with autoimmune hemolytic anemia + cipher:18441 | 35.6925 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.81 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:497 | 16.5 | [('topic_primary', 'Autoimmune hepatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hepatitis"]}')] + ohdsi:729 | 16.5 | [('topic_primary', 'Autoimmune Hepatitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hepatitis"]}')] + cipher:18439 | 16.3825 | [('topic_primary', 'Aplastic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Aplastic Anemia"]}')] + cipher:2178 | 14.7792 | [('topic_primary', 'Autoimmune hemolytic anemias (MAP)')] + +CASE 25: Patients with MSI-low rectal adenocarcinoma + ohdsi:823 | 26.29 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:820 | 26.25 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:836 | 26.25 | [('topic_primary', 'colorectal cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["colorectal cancer"]}')] + ohdsi:822 | 26.25 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:819 | 20.5833 | [('topic_primary', 'Colorectal Cancer'), ('topic_context', '{"context_conditions": ["MSI-L", "MSI-indeterminate", "MSS", "pMMR"], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:831 | 19.5833 | [('topic_primary', 'Colorectal Cancer Treatment'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Colorectal Cancer"]}')] + ohdsi:812 | 18.85 | [('topic_primary', 'Colorectal Cancer')] + ohdsi:843 | 18.83 | [('topic_primary', 'colorectal cancer')] + +CASE 26: Patients with blistering skin lesions + ohdsi:652 | 13.875 | [('topic_primary', 'Vasculitis of the skin'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vasculitis of the skin"]}')] + ohdsi:376 | 6.6433 | [('topic_primary', 'Bleeding Skin')] + ohdsi:1168 | 5.54 | [('topic_primary', 'Skin Ulcer'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Skin Ulcer"]}')] + ohdsi:948 | -3.73 | [('topic_mismatch', 'Rash')] + ohdsi:461 | -3.75 | [('topic_mismatch', 'Erythema multiforme')] + ohdsi:462 | -3.75 | [('topic_mismatch', 'Lichen planus')] + cipher:18444 | -3.8875 | [('topic_mismatch', 'Drug Rash with Eosinophilia and Systemic Symptoms (DRESS)')] + cipher:4016 | -3.8875 | [('topic_mismatch', 'Desquamative Rash')] + +CASE 27: Patients with stomatitis or mucositis + cipher:3657 | 27.7375 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 15.8792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 15.8392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 15.3592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 15.3192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 15.2792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -3.8875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -10.8875 | [('topic_mismatch', 'Open Wound')] + +CASE 28: Patients with neurofibromatosis type 1 + ohdsi:697 | 35.85 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:304 | 35.83 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:305 | 35.75 | [('topic_primary', 'Neurofibromatosis type 1'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis type 1"]}')] + ohdsi:696 | 20.79 | [('topic_primary', 'Neurofibromatosis type 2')] + ohdsi:698 | 20.06 | [('topic_primary', 'Neurofibromatosis syndrome'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Neurofibromatosis syndrome"]}')] + ohdsi:308 | 15.6867 | [('topic_primary', 'Neurofibromatosis'), ('topic_context', '{"context_conditions": ["MRI of Brain", "Ophthalmology Visits"], "target_conditions": ["Neurofibromatosis"]}')] + ohdsi:306 | 13.85 | [('topic_primary', 'Optic Pathway Glioma and Neurofibromatosis')] + cipher:3115 | 12.6125 | [('topic_primary', 'Neurofibromatosis')] + +CASE 29: Patients with keloid scars + cipher:15610 | 8.6125 | [('topic_primary', 'Keloid Scar')] + ohdsi:468 | -3.75 | [('topic_mismatch', 'Pityriasis rubra pilaris')] + cipher:18443 | -10.8875 | [('topic_mismatch', 'Severe Cutaneous Adverse Reaction (SCAR)')] + cipher:30650 | -11.3875 | [('topic_mismatch', 'Smoking Status')] + ohdsi:1168 | -14.67 | [('topic_mismatch', 'Skin Ulcer')] + ohdsi:1215 | -14.69 | [('topic_mismatch', 'Cancer')] + ohdsi:877 | -18.75 | [('topic_mismatch', 'Postoperative Cardiac Complications')] + ohdsi:1102 | -20.21 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 30: Patients with acetaminophen exposure + ohdsi:1187 | 32.85 | [('topic_primary', 'acetaminophen exposure')] + ohdsi:1158 | 20.79 | [('topic_primary', 'Aspirin Exposure')] + cipher:18919 | -11.8875 | [('topic_mismatch', 'Serious Adverse Events')] + cipher:31254 | -15.3875 | [('topic_mismatch', 'Liver Disease')] + cipher:30616 | -15.3875 | [('topic_mismatch', 'Substance Misuse')] + ohdsi:1423 | -17.73 | [('topic_mismatch', 'Acute Intoxication')] + ohdsi:735 | -17.75 | [('topic_mismatch', 'Acute Liver Injury')] + ohdsi:1425 | -17.75 | [('topic_mismatch', 'Alcohol Intoxication')] + +CASE 31: Patients exposed to rifamycin antibiotics + ohdsi:1207 | 24.0833 | [('topic_primary', 'Antibiotics - Monobactams'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Antibiotics - Monobactams", "Aztreonam"]}')] + ohdsi:1203 | 20.75 | [('topic_primary', 'Antibiotics Cephalosporins')] + ohdsi:1211 | 0.85 | [('topic_mismatch', 'Rifamycins')] + ohdsi:1213 | 0.83 | [('topic_mismatch', 'Streptogramins')] + ohdsi:1206 | 0.81 | [('topic_mismatch', 'Macrolide Drug Exposure')] + ohdsi:1202 | 0.77 | [('topic_mismatch', 'Carbapenems')] + ohdsi:1210 | 0.75 | [('topic_primary', 'Antibiotics Persistence')] + ohdsi:1208 | 0.75 | [('topic_mismatch', 'Oxazolidinones')] + +CASE 32: Patients with a joint or ligament sprain + cipher:14236 | 30.6525 | [('topic_primary', 'Joint Ligament Sprain'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Joint Ligament Sprain"]}')] + ohdsi:363 | 18.56 | [('topic_primary', 'Joint stiffness'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Joint stiffness"]}')] + cipher:15814 | 16.4658 | [('topic_primary', 'Joint Ligament Sprain')] + ohdsi:452 | 7.75 | [('topic_primary', 'Joint pain')] + cipher:2944 | 7.2125 | [('topic_primary', 'Ligament sprain')] + cipher:18945 | -5.3875 | [('topic_mismatch', 'Fractures and Sprains')] + cipher:29559 | -9.3075 | [('topic_mismatch', 'Musculoskeletal Pain and Injury')] + cipher:3651 | -11.3875 | [('topic_mismatch', 'Sprains and strains')] + +CASE 33: Pregnant patients with miscarriage or stillbirth + cipher:3056 | 31.6125 | [('topic_primary', 'Miscarriage; Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:15565 | 31.6125 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + cipher:13818 | 31.6125 | [('topic_primary', 'Miscarriage, Stillbirth'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage", "Stillbirth"]}')] + ohdsi:627 | 21.33 | [('topic_primary', 'Miscarriage'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Miscarriage"]}')] + ohdsi:1432 | 16.31 | [('topic_primary', 'Stillbirth')] + ohdsi:606 | 16.25 | [('topic_primary', 'Stillbirth')] + ohdsi:1434 | -7.65 | [('topic_mismatch', 'Pregnancy Loss')] + ohdsi:1431 | -7.71 | [('topic_mismatch', 'Ectopic Pregnancy')] + +CASE 34: Patients with arterial embolism or thrombosis of a lower extremity artery + cipher:13354 | 19.2375 | [('topic_primary', 'Arterial Embolism and Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism and Thrombosis"]}')] + cipher:31817 | 15.6325 | [('topic_primary', 'Embolism or Thrombosis')] + cipher:31293 | 13.6125 | [('topic_primary', 'Arterial Embolism and Thrombosis')] + cipher:31819 | 12.6925 | [('topic_primary', 'Arterial Embolism, Upper Extremity'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Arterial Embolism"]}')] + cipher:31818 | 11.3625 | [('topic_primary', 'Iliac Artery Embolism/Thrombosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Iliac Artery Embolism", "Iliac Thrombosis"]}')] + ohdsi:1090 | 6.25 | [('topic_primary', 'Pulmonary Embolism'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Pulmonary Embolism"]}')] + cipher:31820 | -12.7875 | [('topic_mismatch', 'Lower Extremity Vascular Disease')] + ohdsi:1291 | -21.71 | [('topic_mismatch', 'Bypass Surgery')] + +CASE 35: Patients with a urinary tract infection who are new users of cephalosporins + cipher:31223 | 35.6125 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1186 | 31.77 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:861 | 22.5 | [('topic_primary', 'Urinary Tract Infection'), ('topic_context', '{"context_conditions": ["Pyuria", "Bacteriuria", "Cystitis"], "target_conditions": ["Urinary Tract Infection"]}')] + ohdsi:1061 | -7.65 | [('topic_mismatch', 'Cephalosporin Exposure')] + ohdsi:1044 | -7.67 | [('topic_mismatch', 'Cephalosporin Use')] + ohdsi:1060 | -7.69 | [('topic_mismatch', 'Fluoroquinolone Use')] + ohdsi:1064 | -7.75 | [('topic_mismatch', 'Cephalosporin Exposure')] + ohdsi:1062 | -7.75 | [('topic_mismatch', 'Trimethoprim systemetic')] + +CASE 36: Patients hospitalized with preinfarction syndrome + ohdsi:939 | -1.4357 | [('topic_context', '{"context_conditions": ["Preinfarction Syndrome", "Emergency Room Visit", "Inpatient Visit"], "target_conditions": ["Hos... [truncated 15 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1081 | -1.73 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30748 | -1.8875 | [('topic_mismatch', 'Myocardial Infarction')] + cipher:29772 | -5.3875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:30101 | -5.3875 | [('topic_mismatch', 'Myocardial Infarction')] + ohdsi:260 | -8.75 | [('topic_mismatch', 'ST elevation myocardial infarction')] + cipher:30617 | -12.8275 | [('topic_mismatch', 'Coronary Heart Disease')] + ohdsi:1159 | -16.17 | [('topic_mismatch', 'Angina')] + +CASE 37: Patients with a personal history of blood or blood-forming organ disease + cipher:3412 | 5.0125 | [('topic_primary', 'Personal history of diseases of blood and blood-forming organs')] + ohdsi:738 | -3.75 | [('topic_mismatch', 'Autoimmune hemolytic anemia')] + cipher:18428 | -3.8475 | [('topic_mismatch', 'Pancytopenia')] + cipher:30246 | -3.8675 | [('topic_mismatch', 'Aplastic Anaemias')] + cipher:30138 | -3.8875 | [('topic_mismatch', 'Hyposplenism')] + cipher:29220 | -3.8875 | [('topic_mismatch', 'Anemias, Other')] + cipher:30287 | -3.8875 | [('topic_mismatch', 'Myelodysplastic Syndromes')] + cipher:30672 | -3.8875 | [('topic_mismatch', 'Thalassaemia Trait')] + +CASE 38: Patients with benign pancreatic conditions + cipher:16955 | 20.6925 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16954 | 20.6725 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16947 | 20.6125 | [('topic_primary', 'Pancreatic Conditions')] + cipher:16952 | 19.2375 | [('topic_primary', 'Pancreatic Cancer'), ('topic_context', '{"context_conditions": ["PSC", "IBD"], "target_conditions": ["Pancreatic Cancer"]}')] + cipher:16953 | 8.6125 | [('topic_primary', 'Pancreatic Inflammation')] + ohdsi:496 | -3.75 | [('topic_mismatch', 'Abdominal Pain')] + cipher:30238 | -3.8675 | [('topic_mismatch', 'Pancreatitis')] + cipher:30223 | -3.8875 | [('topic_mismatch', 'Benign Neoplasm of Stomach and Duodenum')] + +CASE 39: Patients with primary localized osteoarthritis + cipher:3192 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9025 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1725 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.77 | [('topic_primary', 'Osteoarthritis')] + +CASE 40: New users of dihydropyridine calcium channel blockers + ohdsi:1047 | 44.33 | [('topic_primary', 'dihydropyridine calcium channel blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["dihydropyridine calcium channel blockers"]}')] + ohdsi:1048 | 36.85 | [('topic_primary', 'dihydropyridine calcium channel blockers')] + ohdsi:1036 | 23.31 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1049 | 22.625 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Hypertension", "Essential Hypertension"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1052 | 22.54 | [('topic_primary', 'Beta Blockers'), ('topic_context', '{"context_conditions": ["Acute Myocardial Infarction"], "target_conditions": ["Beta Blockers"]}')] + ohdsi:1046 | 4.75 | [('topic_mismatch', 'Thiazide diuretics')] + cipher:30152 | -9.3875 | [('topic_mismatch', 'Hypertension')] + cipher:30608 | -11.8675 | [('topic_mismatch', 'Cardiovascular Risk Score')] + +CASE 41: Veteran patients with renal sclerosis + cipher:17322 | 21.7125 | [('topic_primary', 'Renal Sclerosis')] + cipher:18902 | 9.6125 | [('topic_primary', 'Renal Failure')] + cipher:31257 | 8.6125 | [('topic_primary', 'Renal disease')] + cipher:30611 | 6.6125 | [('topic_primary', 'End-Stage Renal Disease')] + cipher:16003 | -3.8875 | [('topic_mismatch', 'Chronic Kidney Disease')] + cipher:4282 | -10.3875 | [('topic_mismatch', 'Chronic Kidney Disease')] + ohdsi:964 | -11.19 | [('topic_mismatch', 'Chronic Kidney Disease')] + cipher:31686 | -11.3475 | [('topic_mismatch', 'Chronic Kidney Disease')] + +CASE 42: Veteran patients with polymyalgia rheumatica + cipher:30277 | 34.2125 | [('topic_primary', 'Polymyalgia Rheumatica'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Polymyalgia Rheumatica"]}')] + cipher:17453 | 20.1725 | [('topic_primary', 'Polymyalgia Rheumatica')] + cipher:3460 | 19.1125 | [('topic_primary', 'Polymyalgia Rheumatica')] + ohdsi:670 | -5.21 | [('topic_mismatch', 'Temporal arteritis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:363 | -5.25 | [('topic_mismatch', 'Joint stiffness')] + cipher:30630 | -12.8675 | [('topic_mismatch', 'Rheumatoid Arthritis')] + cipher:31127 | -12.8875 | [('topic_mismatch', 'Fibromyalgia Pain')] + +CASE 43: Veteran patients with autoimmune hemolytic anemia + cipher:18441 | 35.6925 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:1018 | 35.165 | [('topic_primary', 'Autoimmune Hemolytic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Warm Autoimmune Hemolytic Anemia"]}')] + ohdsi:738 | 34.85 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + ohdsi:728 | 27.81 | [('topic_primary', 'Autoimmune hemolytic anemia'), ('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}')] + cipher:17112 | 18.6325 | [('topic_primary', 'Autoimmune Hemolytic Anemias')] + cipher:18439 | 16.3625 | [('topic_primary', 'Aplastic Anemia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Aplastic Anemia"]}')] + cipher:2178 | 14.7792 | [('topic_primary', 'Autoimmune hemolytic anemias (MAP)')] + cipher:17113 | 6.9458 | [('topic_primary', 'Hemolytic Anemias')] + +CASE 44: Veteran patients with cardiac complications + ohdsi:1081 | -3.75 | [('topic_mismatch', 'Acute Myocardial Infarction')] + cipher:30192 | -3.8875 | [('topic_mismatch', 'Cardiomyopathy')] + cipher:29218 | -3.8875 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:16294 | -9.8875 | [('topic_mismatch', 'Cardiovascular Disease Mortality')] + cipher:16189 | -9.8875 | [('topic_mismatch', 'COVID-19 Severity')] + cipher:16278 | -11.3275 | [('topic_mismatch', 'VA Administrative Data')] + cipher:30617 | -11.3675 | [('topic_mismatch', 'Coronary Heart Disease')] + cipher:16275 | -11.3875 | [('topic_mismatch', 'Chronic Pulmonary Disease')] + +CASE 45: Patients diagnosed with fasciitis + cipher:15684 | 35.6125 | [('topic_primary', 'Fasciitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Fasciitis"]}')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:454 | -3.71 | [('topic_mismatch', 'Dermatomyositis')] + ohdsi:479 | -3.75 | [('topic_mismatch', 'Chilblains')] + ohdsi:363 | -3.75 | [('topic_mismatch', 'Joint stiffness')] + cipher:30159 | -3.7875 | [('topic_mismatch', 'Enthesopathies and Synovial Disorders')] + cipher:29553 | -3.8275 | [('topic_mismatch', 'Sleep Apnea')] + cipher:30170 | -3.8875 | [('topic_mismatch', 'Diabetes Mellitus')] + +CASE 46: Patients with stomatitis or mucositis + cipher:3657 | 27.7375 | [('topic_primary', 'Stomatitis and mucositis (ulcerative)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Stomatitis and mucositis (ulcerative)"]}')] + cipher:17298 | 15.8792 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:15333 | 15.8392 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:13516 | 15.3592 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:13515 | 15.3192 | [('topic_primary', 'Stomatitis and Mucositis')] + cipher:3656 | 15.2792 | [('topic_primary', 'Stomatitis and mucositis')] + cipher:30086 | -3.8875 | [('topic_mismatch', 'Oropharyngeal Malignancy')] + cipher:17544 | -10.8875 | [('topic_mismatch', 'Open Wound')] + +CASE 47: Patients with Barretts esophagus + cipher:2187 | 35.6325 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + cipher:15342 | 35.6125 | [('topic_primary', "Barrett's esophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s esophagus"]}')] + cipher:30228 | 19.9625 | [('topic_primary', "Barrett's Oesophagus"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Barrett\'s Oesophagus"]}')] + ohdsi:447 | -3.69 | [('topic_mismatch', 'Esophagitis')] + ohdsi:525 | -3.71 | [('topic_mismatch', 'Gastroesophageal Reflux Disease')] + ohdsi:446 | -3.75 | [('topic_mismatch', 'Eosinophilic esophagitis')] + ohdsi:500 | -3.75 | [('topic_mismatch', 'Gastritis')] + cipher:30087 | -3.8875 | [('topic_mismatch', 'Primary Malignancy, Oesophageal')] + +CASE 48: Patients with regional enteritis + cipher:3534 | 35.6525 | [('topic_primary', 'Regional enteritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Regional enteritis"]}')] + ohdsi:884 | -3.75 | [('topic_mismatch', 'Diarrhea')] + ohdsi:501 | -3.75 | [('topic_mismatch', "Crohn's disease")] + cipher:30115 | -3.8675 | [('topic_mismatch', "Crohn's Disease")] + cipher:30160 | -3.8875 | [('topic_mismatch', 'Enteropathic Arthropathy')] + ohdsi:330 | -7.75 | [('topic_mismatch', 'Abdominal bloating')] + cipher:29190 | -11.3875 | [('topic_mismatch', "Crohn's Disease")] + cipher:31254 | -11.3875 | [('topic_mismatch', 'Liver Disease')] + +CASE 49: Patients with primary localized osteoarthritis + cipher:3192 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4399 | 34.1125 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis"]}')] + cipher:4029 | 32.8625 | [('topic_primary', 'Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Thumb Osteoarthritis"]}')] + cipher:3190 | 28.9025 | [('topic_primary', 'Osteoarthritis localized'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis localized"]}')] + cipher:16011 | 28.8625 | [('topic_primary', 'Knee Osteoarthritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Knee Osteoarthritis"]}')] + cipher:15900 | 28.1125 | [('topic_primary', 'Hip Osteoarthritis'), ('topic_context', '{"context_conditions": ["Degenerative Joint Disease"], "target_conditions": ["Hip Osteoarthritis"]}')] + cipher:30133 | 27.1725 | [('topic_primary', 'Osteoarthritis (Excl Spine)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Osteoarthritis (Excl Spine)"]}')] + ohdsi:396 | 26.77 | [('topic_primary', 'Osteoarthritis')] + +CASE 50: Patients with aortic valve disease + cipher:30301 | 24.6958 | [('topic_primary', 'Aortic Valve Disorders'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nonrheumatic aortic valve disorders"]}')] + cipher:31315 | 5.9458 | [('topic_primary', 'Valvular Disease')] + ohdsi:1172 | 0.85 | [('topic_primary', 'Heart valve disorder')] + ohdsi:1103 | -1.0233 | [('topic_primary', 'Cardiac Valve Surgery'), ('topic_context', '{"context_conditions": ["Atrial Fibrillation"], "target_conditions": ["Cardiac Valve Surgery"]}')] + ohdsi:876 | -4.23 | [('topic_primary', 'Cardiac Valve Surgery')] + ohdsi:1315 | -4.25 | [('topic_primary', 'Cardiac Valve Surgery')] + ohdsi:1300 | -4.25 | [('topic_primary', 'Cardiac Valve Surgery')] + ohdsi:1102 | -20.17 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + +CASE 51: Patients with chronic periodontitis + cipher:2371 | 28.1325 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13494 | 28.1125 | [('topic_primary', 'Chronic Periodontitis')] + cipher:3397 | 25.1925 | [('topic_primary', 'Periodontitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Periodontitis"]}')] + cipher:15317 | 20.6125 | [('topic_primary', 'Chronic Periodontitis')] + cipher:13492 | 20.1525 | [('topic_primary', 'Periodontitis')] + cipher:29206 | -3.8875 | [('topic_mismatch', 'Peritonsillar Abscess')] + cipher:16074 | -10.8875 | [('topic_mismatch', 'Dental Caries Risk')] + cipher:16936 | -11.2875 | [('topic_mismatch', 'Edentulism')] + +CASE 52: Patients with hypertensive chronic kidney disease + ohdsi:1191 | 19.0 | [('topic_primary', 'Chronic Kidney Disease'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Chronic Kidney Disease"]}')] + ohdsi:964 | 16.85 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31686 | 16.6125 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:30635 | 16.6125 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31287 | 14.1325 | [('topic_primary', 'Chronic Kidney Disease')] + cipher:31697 | 7.8725 | [('topic_primary', 'Hypertensive Heart and Renal Disease')] + cipher:30152 | -3.8875 | [('topic_mismatch', 'Hypertension')] + cipher:29211 | -11.3475 | [('topic_mismatch', 'Hypertension End Organ Damage')] + +CASE 53: Patients with cardiomyopathy + cipher:30192 | 32.9625 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Other Cardiomyopathy"]}')] + cipher:31252 | 32.5058 | [('topic_primary', 'Cardiomyopathy'), ('topic_context', '{"context_conditions": ["Heart Failure"], "target_conditions": ["Cardiomyopathy"]}')] + ohdsi:679 | 29.08 | [('topic_primary', 'Takotsubo cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Takotsubo cardiomyopathy"]}')] + cipher:30174 | 28.9025 | [('topic_primary', 'Dilated Cardiomyopathy'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dilated Cardiomyopathy"]}')] + cipher:30155 | 27.5292 | [('topic_primary', 'Hypertrophic Cardiomyopathy (HCM)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Hypertrophic Cardiomyopathy"]}')] + cipher:31636 | 19.1325 | [('topic_primary', 'Cardiomyopathy')] + cipher:31587 | 19.1125 | [('topic_primary', 'Cardiomyopathy')] + ohdsi:1082 | -5.25 | [('topic_mismatch', 'Myocarditis Pericarditis')] + +CASE 54: Patients with scleritis or episcleritis + cipher:30069 | 25.1725 | [('topic_primary', 'Scleritis and Episcleritis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Scleritis and Episcleritis"]}')] + ohdsi:1226 | -3.71 | [('topic_mismatch', 'Uveitis')] + ohdsi:1229 | -3.73 | [('topic_mismatch', "Behcet's Uveitis")] + ohdsi:1223 | -3.75 | [('topic_mismatch', 'Uveitis')] + ohdsi:620 | -3.75 | [('topic_mismatch', 'Uveitis')] + ohdsi:353 | -3.75 | [('topic_mismatch', 'Conjunctivitis')] + cipher:30247 | -3.8875 | [('topic_mismatch', 'Uveitis')] + cipher:15886 | -3.8875 | [('topic_mismatch', 'Anterior Uveitis')] + +CASE 55: Patients with a carbohydrate transport and metabolism disorder + cipher:12820 | 15.8125 | [('topic_primary', 'Carbohydrate Transport and Metabolism Disorders')] + cipher:12818 | 14.8058 | [('topic_primary', 'Disorders of Carbohydrate Transport and Metabolism')] + cipher:3256 | 14.3125 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:17097 | 14.2525 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:14837 | 14.2125 | [('topic_primary', 'Carbohydrate Transport Metabolism')] + cipher:2597 | 9.6411 | [('topic_primary', 'Protein Plasma-Amino-Acid Transport and Metabolism')] + cipher:2573 | 8.4258 | [('topic_primary', 'Carbohydrate Metabolism Disorders')] + ohdsi:1075 | -3.75 | [('topic_mismatch', 'Narcolepsy')] + +CASE 56: patients with a drug exposure to acetaminophen in the hospital setting + ohdsi:1187 | 27.35 | [('topic_primary', 'acetaminophen exposure')] + ohdsi:1158 | -0.75 | [('topic_mismatch', 'Aspirin Exposure')] + ohdsi:1181 | -0.75 | [('topic_mismatch', 'Steroids')] + ohdsi:719 | -9.75 | [('topic_mismatch', 'Hepatic Injury')] + ohdsi:735 | -15.75 | [('topic_mismatch', 'Acute Liver Injury')] + ohdsi:293 | -15.75 | [('topic_mismatch', 'Acute Liver Injury')] + cipher:18446 | -15.8075 | [('topic_mismatch', 'Acute Liver Injury')] + cipher:31254 | -16.8675 | [('topic_mismatch', 'Liver Disease')] + +CASE 57: Patients diagnosed with dyschromia and vitiligo + cipher:13900 | 35.6725 | [('topic_primary', 'Dyschromia and Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dyschromia and Vitiligo"]}')] + cipher:2628 | 35.6325 | [('topic_primary', 'Dyschromia and Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dyschromia and Vitiligo"]}')] + ohdsi:471 | 21.85 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:30727 | 21.6925 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:13901 | 19.6125 | [('topic_primary', 'Vitiligo'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Vitiligo"]}')] + cipher:15590 | 18.1525 | [('topic_primary', 'Dyschromia and Vitiligo')] + cipher:13902 | 13.4458 | [('topic_primary', 'Other Dyschromia')] + cipher:15592 | 5.9458 | [('topic_primary', 'Other dyschromia')] + +CASE 58: Patients with with no pre-existing liver disease who receive a diagnosis of acute hepatic injury + ohdsi:716 | 28.125 | [('topic_primary', 'Acute Hepatic Injury'), ('topic_context', '{"context_conditions": ["Hepatic Failure"], "target_conditions": ["Acute Hepatic Injury"]}')] + ohdsi:735 | 26.56 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure"], "target_conditions": ["Acute Liver Injury"]}')] + ohdsi:294 | 26.04 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure", "Viral Hepatitis", "Alcoholic Liver Disease"], "target_conditions": [... [truncated 22 chars]')] + ohdsi:293 | 25.25 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + cipher:18447 | 25.2125 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + cipher:18446 | 25.1925 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Liver Injury"]}')] + ohdsi:719 | 22.1667 | [('topic_primary', 'Hepatic Injury'), ('topic_context', '{"context_conditions": ["Jaundice", "Liver Disease"], "target_conditions": ["Acute Hepatic Injury"]}')] + ohdsi:736 | 19.52 | [('topic_primary', 'Acute Liver Injury'), ('topic_context', '{"context_conditions": ["Chronic Hepatic Failure"], "target_conditions": ["Acute Liver Injury"]}')] + +CASE 59: A PheCode-based definition of patients with nerve plexus lesions + cipher:3108 | 34.1525 | [('topic_primary', 'Nerve Plexus Lesions'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nerve Plexus Lesions"]}')] + cipher:13084 | 24.7125 | [('topic_primary', 'Nerve Plexus Lesions')] + cipher:13085 | 23.6125 | [('topic_primary', 'Nerve Root Lesions'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Nerve Root Lesions"]}')] + cipher:14974 | 19.1925 | [('topic_primary', 'Nerve Plexus Lesions')] + cipher:13083 | 16.5392 | [('topic_primary', 'Nerve Root and Plexus Disorders')] + cipher:14973 | 8.9992 | [('topic_primary', 'Nerve Root and Plexus Disorders')] + cipher:3109 | 8.9792 | [('topic_primary', 'Nerve root and plexus disorders')] + cipher:14155 | -5.3875 | [('topic_mismatch', 'Thoracic or Lumbosacral Neuritis or Radiculitis')] + +CASE 60: patients with a diagnosis of PRES + ohdsi:223 | 21.95 | [('topic_primary', 'Posterior reversible encephalopathy syndrome (PRES)')] + ohdsi:1075 | -3.67 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:1317 | -3.69 | [('topic_mismatch', 'Reye’s syndrome')] + ohdsi:516 | -3.71 | [('topic_mismatch', 'Thrombotic microangiopathy')] + ohdsi:248 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:1084 | -3.75 | [('topic_mismatch', 'Disseminated Intravascular Coagulation (DIC)')] + ohdsi:520 | -3.75 | [('topic_mismatch', 'Hypertensive disorder')] + ohdsi:543 | -3.75 | [('topic_mismatch', 'Seizure')] + +CASE 61: patients with chronic ulcerative colitis + ohdsi:860 | 35.83 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis"]}')] + ohdsi:458 | 35.79 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative colitis"]}')] + cipher:30724 | 35.6125 | [('topic_primary', 'Ulcerative Colitis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Ulcerative Colitis"]}')] + ohdsi:201 | 31.435 | [('topic_primary', 'Ulcerative colitis'), ('topic_context', '{"context_conditions": ["Rectal hemorrhage", "Inflammatory bowel disease", "Complications"], "target_conditions": ["Ulce... [truncated 17 chars]')] + ohdsi:775 | 4.0278 | [('topic_context', '{"context_conditions": ["First IBD Occurrence", "Chronic Ulcerative Proctitis"], "target_conditions": ["Inflammatory Bow... [truncated 13 chars]'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1067 | 0.5833 | [('topic_context', '{"context_conditions": ["Ulcerative colitis", "Rectal hemorrhage"], "target_conditions": ["JAK inhibitors"]}'), ('context_without_primary', 'topic only matched context fields')] + cipher:30115 | -3.8875 | [('topic_mismatch', "Crohn's Disease")] + ohdsi:1068 | -10.25 | [('topic_mismatch', 'Tumor Necrosis Factor alpha (TNFa) inhibitors')] + +CASE 62: Veteran patients with developmental disorders that are pervasive + cipher:17197 | 17.6125 | [('topic_primary', 'Developmental Delays and Disorders')] + cipher:17193 | 16.5458 | [('topic_primary', 'Pervasive Developmental Disorders')] + cipher:18933 | 15.7792 | [('topic_primary', 'Mental Health Disorders')] + cipher:17141 | 15.1125 | [('topic_primary', 'Specific Nonpsychotic Mental Disorders')] + cipher:17138 | 7.6325 | [('topic_primary', 'Other Persistent Mental Disorders')] + cipher:18947 | -2.8875 | [('topic_mismatch', 'Substance Use Disorder')] + cipher:30605 | -3.8075 | [('topic_mismatch', 'Autism Spectrum')] + cipher:30166 | -3.8475 | [('topic_mismatch', 'Down Syndrome')] + +CASE 63: patients with at least 2 recorded diagnoses of acute myocardial infarction + ohdsi:510 | 34.27 | [('topic_primary', 'Acute myocardial infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute myocardial infarction"]}')] + ohdsi:1081 | 34.25 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:18982 | 34.1925 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:31590 | 34.1125 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + cipher:31275 | 34.1125 | [('topic_primary', 'Acute Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute Myocardial Infarction"]}')] + ohdsi:881 | 29.0167 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Acute myocardial infarction"]}')] + cipher:3998 | 27.1725 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Myocardial Infarction"]}')] + cipher:30748 | 27.1525 | [('topic_primary', 'Myocardial Infarction'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Myocardial Infarction"]}')] + +CASE 64: patients diagnosed with antiphospholipid syndrome who have recieved care in the outpatient setting + ohdsi:632 | 19.35 | [('topic_primary', 'Antiphospholipid syndrome')] + ohdsi:781 | 19.33 | [('topic_primary', 'Antiphospholipid Syndrome')] + ohdsi:738 | 2.77 | [('topic_context', '{"context_conditions": ["Evans syndrome"], "target_conditions": ["Autoimmune hemolytic anemia"]}'), ('context_without_primary', 'topic only matched context fields')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + ohdsi:683 | -5.25 | [('topic_mismatch', 'Antineutrophil cytoplasmic antibody positive vasculitis')] + cipher:31306 | -5.3875 | [('topic_mismatch', 'Atrial Fibrillation')] + cipher:31284 | -12.8875 | [('topic_mismatch', 'Thrombophilia')] + cipher:31215 | -12.8875 | [('topic_mismatch', 'Thrombophilia')] + +CASE 65: older adults with a likely diagnosis of ADRD or late-stage dementia + ohdsi:651 | 18.95 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + ohdsi:864 | 18.95 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + ohdsi:927 | 18.95 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31120 | 18.9125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:31241 | 18.8325 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + cipher:30112 | 18.8125 | [('topic_primary', 'Dementia'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Dementia"]}')] + ohdsi:33 | 15.45 | [('topic_primary', 'Dementia')] + cipher:31843 | 10.9375 | [('topic_primary', "Alzheimer's Disease, Late Onset"), ('topic_context', '{"context_conditions": [], "target_conditions": ["Alzheimer\'s Disease, Late Onset"]}')] + +CASE 66: patients who experienced a GI bleed adverse event + ohdsi:482 | 27.35 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal hemorrhage', 'field': 'primary_clinical_topic', 'topic': 'Gastrointestinal hemorrhage'}), ('dynamic_clinical_alias_context', {'alias': 'gastrointestinal hemorrhage', 'field': 'target_vs_context_conditions'})] + ohdsi:888 | 27.25 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal bleeding', 'field': 'primary_clinical_topic', 'topic': 'Gastrointestinal Bleeding'}), ('dynamic_clinical_alias_context', {'alias': 'gastrointestinal bleeding', 'field': 'target_vs_context_conditions'})] + ohdsi:417 | 25.25 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal bleeding', 'field': 'primary_clinical_topic', 'topic': 'Gastrointestinal Bleeding'}), ('dynamic_clinical_alias_context', {'alias': 'gastrointestinal bleeding', 'field': 'target_vs_context_conditions'})] + ohdsi:349 | 21.29 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal bleeding', 'field': 'primary_clinical_topic', 'topic': 'Gastrointestinal Bleeding'})] + ohdsi:77 | 21.25 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal bleeding', 'field': 'primary_clinical_topic', 'topic': 'Gastrointestinal Bleeding'})] + cipher:16289 | 18.1125 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal bleeding', 'field': 'primary_clinical_topic', 'topic': 'Bleeding'}), ('dynamic_clinical_alias_context', {'alias': 'gastrointestinal bleeding', 'field': 'target_vs_context_conditions'})] + ohdsi:1197 | 17.27 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal bleeding', 'field': 'primary_clinical_topic', 'topic': 'Gastrointestinal Bleeding'})] + ohdsi:299 | 15.81 | [('dynamic_clinical_alias_match', {'alias': 'gastrointestinal bleeding', 'field': 'primary_clinical_topic', 'topic': 'Gastrointestinal Bleeding and Perforation'})] + +CASE 67: patients who received a COVID-19 diagnosis in the outpatient setting + ohdsi:678 | 34.35 | [('topic_primary', 'COVID-19'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COVID-19"]}')] + cipher:31308 | 30.5775 | [('topic_primary', 'Confirmed COVID-19'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Confirmed COVID-19"]}')] + cipher:31276 | 24.4458 | [('topic_primary', 'COVID-19 Infection'), ('topic_context', '{"context_conditions": ["COVID-19"], "target_conditions": []}')] + ohdsi:47 | 18.75 | [('topic_primary', 'COVID-19 diagnosis'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COVID-19 diagnosis"]}')] + ohdsi:59 | 17.9967 | [('topic_primary', 'COVID-19 Diagnosis'), ('topic_context', '{"context_conditions": ["SARS-CoV-2 test"], "target_conditions": ["COVID-19 diagnosis"]}')] + ohdsi:44 | 17.1433 | [('topic_primary', 'COVID-19 Infection')] + ohdsi:1085 | -5.25 | [('topic_mismatch', 'Appendicitis')] + ohdsi:1075 | -5.25 | [('topic_mismatch', 'Narcolepsy')] + +CASE 68: veterans who experienced an abdominal aortic aneurysm + ohdsi:1290 | -2.9167 | [('topic_primary', 'Aortic Repair')] + ohdsi:866 | -2.9167 | [('topic_primary', 'Aortic Repair')] + ohdsi:1314 | -20.19 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + ohdsi:1291 | -20.21 | [('topic_mismatch', 'Bypass Surgery')] + ohdsi:1102 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft Surgery')] + ohdsi:867 | -20.25 | [('topic_mismatch', 'Lower Extremity Bypass')] + ohdsi:1299 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + ohdsi:875 | -20.25 | [('topic_mismatch', 'Coronary Artery Bypass Graft')] + +CASE 69: patients with COPD according to diagnostic codes in the EHR + cipher:29794 | 35.6925 | [('topic_primary', 'COPD'), ('topic_context', '{"context_conditions": [], "target_conditions": ["COPD"]}')] + cipher:29756 | 21.7725 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:31297 | 21.7525 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + ohdsi:1192 | 17.85 | [('topic_primary', 'Chronic Obstructive Pulmonary Disease (COPD)')] + cipher:29553 | -3.8875 | [('topic_mismatch', 'Sleep Apnea')] + cipher:29212 | -3.8875 | [('topic_mismatch', 'Bronchiestasis')] + cipher:29755 | -3.8875 | [('topic_mismatch', 'Asthma')] + cipher:30170 | -3.8875 | [('topic_mismatch', 'Diabetes Mellitus')] + +CASE 70: patients hospitalized at least once for heart failure + cipher:16152 | 37.6125 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + cipher:30612 | 37.6125 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:934 | 36.9167 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": ["Hypertension"], "target_conditions": ["Heart Failure"]}')] + ohdsi:1303 | 34.33 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + cipher:30106 | 34.1125 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + cipher:16291 | 30.1725 | [('topic_primary', 'Heart Failure Rehospitalization'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure Rehospitalization"]}')] + ohdsi:979 | 27.35 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + ohdsi:938 | 26.77 | [('topic_primary', 'Heart Failure'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Heart Failure"]}')] + +CASE 71: patients who appear to have diabetes based on a medication-based phenotype + cipher:31250 | 25.1925 | [('topic_primary', 'Diabetes')] + cipher:31195 | 25.1125 | [('topic_primary', 'Diabetes')] + cipher:30170 | 24.9625 | [('topic_primary', 'Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Type 1 Diabetes", "Type 2 Diabetes"]}')] + cipher:3760 | 19.3375 | [('topic_primary', 'Type 1 diabetes with ophthalmic manifestations'), ('topic_context', '{"context_conditions": ["ophthalmic manifestations"], "target_conditions": ["Type 1 diabetes"]}')] + cipher:31171 | 16.6125 | [('topic_primary', 'Diabetes')] + cipher:30759 | 16.3825 | [('topic_primary', 'Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Diabetes Mellitus"]}')] + cipher:31161 | 16.3625 | [('topic_primary', 'Diabetes (Type 2)'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Diabetes (Type 2)"]}')] + cipher:12725 | 14.6125 | [('topic_primary', 'Secondary Diabetes Mellitus'), ('topic_context', '{"context_conditions": [], "target_conditions": ["Secondary Diabetes Mellitus"]}')] + diff --git a/dodo.py b/dodo.py index 7136ade..0f3ca6d 100644 --- a/dodo.py +++ b/dodo.py @@ -363,6 +363,56 @@ def _run_smoke() -> None: } +def task_smoke_cohort_methods_specs_recommend_flow(): + def _run_smoke() -> None: + env = os.environ.copy() + if not env.get("LLM_API_KEY"): + print("Missing LLM_API_KEY in environment. Set it before running this task.") + return + for key, value in DEFAULT_ENV.items(): + env.setdefault(key, value) + if not env.get("STUDY_AGENT_MCP_URL"): + env.setdefault("STUDY_AGENT_MCP_COMMAND", "study-agent-mcp") + env.setdefault("STUDY_AGENT_MCP_ARGS", "") + env.setdefault("LLM_LOG", "1") + env.setdefault("LLM_LOG_PROMPT", "1") + env.setdefault("LLM_LOG_RESPONSE", "1") + env["ACP_URL"] = "http://127.0.0.1:8765/flows/cohort_methods_specifications_recommendation" + + acp_stdout = env.get("ACP_STDOUT", "/tmp/study_agent_acp_stdout.log") + acp_stderr = env.get("ACP_STDERR", "/tmp/study_agent_acp_stderr.log") + mcp_proc = _start_mcp_http_if_needed(env) + print("Starting ACP...") + with open(acp_stdout, "w", encoding="utf-8") as out, open(acp_stderr, "w", encoding="utf-8") as err: + acp_proc = subprocess.Popen(["study-agent-acp"], env=env, stdout=out, stderr=err) + try: + print("Waiting for ACP health endpoint...") + require_mcp = bool(env.get("STUDY_AGENT_MCP_URL") or env.get("STUDY_AGENT_MCP_COMMAND")) + _wait_for_acp("http://127.0.0.1:8765/health", timeout_s=30, require_mcp=require_mcp) + print("Running cohort-methods-specs flow smoke test...") + subprocess.run(["python", "tests/cohort_methods_specs_flow_smoke_test.py"], check=True, env=env) + print(f"ACP logs: {acp_stdout} {acp_stderr}") + finally: + print("Stopping ACP...") + acp_proc.terminate() + try: + acp_proc.wait(timeout=10) + except subprocess.TimeoutExpired: + acp_proc.kill() + if mcp_proc is not None: + print("Stopping MCP...") + mcp_proc.terminate() + try: + mcp_proc.wait(timeout=10) + except subprocess.TimeoutExpired: + mcp_proc.kill() + + return { + "actions": [_run_smoke], + "verbosity": 2, + } + + def task_smoke_phenotype_intent_split_flow(): def _run_smoke() -> None: env = os.environ.copy() @@ -413,6 +463,56 @@ def _run_smoke() -> None: } +def task_smoke_cohort_methods_intent_split_flow(): + def _run_smoke() -> None: + env = os.environ.copy() + if not env.get("LLM_API_KEY"): + print("Missing LLM_API_KEY in environment. Set it before running this task.") + return + for key, value in DEFAULT_ENV.items(): + env.setdefault(key, value) + if not env.get("STUDY_AGENT_MCP_URL"): + env.setdefault("STUDY_AGENT_MCP_COMMAND", "study-agent-mcp") + env.setdefault("STUDY_AGENT_MCP_ARGS", "") + env.setdefault("LLM_LOG", "1") + env.setdefault("LLM_LOG_PROMPT", "1") + env.setdefault("LLM_LOG_RESPONSE", "1") + env["ACP_URL"] = "http://127.0.0.1:8765/flows/cohort_methods_intent_split" + + acp_stdout = env.get("ACP_STDOUT", "/tmp/study_agent_acp_stdout.log") + acp_stderr = env.get("ACP_STDERR", "/tmp/study_agent_acp_stderr.log") + mcp_proc = _start_mcp_http_if_needed(env) + print("Starting ACP...") + with open(acp_stdout, "w", encoding="utf-8") as out, open(acp_stderr, "w", encoding="utf-8") as err: + acp_proc = subprocess.Popen(["study-agent-acp"], env=env, stdout=out, stderr=err) + try: + print("Waiting for ACP health endpoint...") + require_mcp = bool(env.get("STUDY_AGENT_MCP_URL") or env.get("STUDY_AGENT_MCP_COMMAND")) + _wait_for_acp("http://127.0.0.1:8765/health", timeout_s=30, require_mcp=require_mcp) + print("Running cohort methods intent split flow smoke test...") + subprocess.run(["python", "tests/cohort_methods_intent_split_smoke_test.py"], check=True, env=env) + print(f"ACP logs: {acp_stdout} {acp_stderr}") + finally: + print("Stopping ACP...") + acp_proc.terminate() + try: + acp_proc.wait(timeout=10) + except subprocess.TimeoutExpired: + acp_proc.kill() + if mcp_proc is not None: + print("Stopping MCP...") + mcp_proc.terminate() + try: + mcp_proc.wait(timeout=10) + except subprocess.TimeoutExpired: + mcp_proc.kill() + + return { + "actions": [_run_smoke], + "verbosity": 2, + } + + def task_smoke_phenotype_improvements_flow(): def _run_smoke() -> None: env = os.environ.copy() diff --git a/environment.yml b/environment.yml index e40b1bc..86e2266 100644 --- a/environment.yml +++ b/environment.yml @@ -17,3 +17,5 @@ dependencies: - sqlalchemy - ruff - requests + - ruff + - sqlalchemy diff --git a/mcp_server/prompts/case_causal_review/overview_case_causal_review.md b/mcp_server/prompts/case_causal_review/overview_case_causal_review.md index 158ca0e..13e3254 100644 --- a/mcp_server/prompts/case_causal_review/overview_case_causal_review.md +++ b/mcp_server/prompts/case_causal_review/overview_case_causal_review.md @@ -1,6 +1,6 @@ Task: `case_causal_review`. -You are reviewing a de-identified canonical case row prepared upstream by pv-copilot. +You are reviewing a de-identified canonical case row prepared upstream by a pharmacovigilance system. The adverse event is assumed to have occurred already. Your job is to rank only candidate items that could plausibly have contributed causally. Use context items, case metadata, semantic subroles, and compact annotations to support reasoning. diff --git a/mcp_server/prompts/case_causal_review/system_prompt_case_causal_review.md b/mcp_server/prompts/case_causal_review/system_prompt_case_causal_review.md index 3eb7bfa..5b83fcd 100644 --- a/mcp_server/prompts/case_causal_review/system_prompt_case_causal_review.md +++ b/mcp_server/prompts/case_causal_review/system_prompt_case_causal_review.md @@ -1,6 +1,6 @@ Act as a clinician performing causal review on a de-identified canonical case row. The adverse event under review is {adverse_event_name}. -The row came from source type {source_type} after upstream shaping by pv-copilot. +The row came from source type {source_type} after upstream shaping by a pharmacovigilance system. Rank only observed candidate items already present in the supplied case row. Treat context items and case metadata as supporting evidence only. Do not rank the index event, and do not force a single cause. diff --git a/mcp_server/prompts/cohort_methods/CM_ANALYSIS_TEMPLATE.md b/mcp_server/prompts/cohort_methods/CM_ANALYSIS_TEMPLATE.md new file mode 100644 index 0000000..8ebd6b0 --- /dev/null +++ b/mcp_server/prompts/cohort_methods/CM_ANALYSIS_TEMPLATE.md @@ -0,0 +1,179 @@ +# CohortMethod cmAnalysis Template v1.4.0 Review Copy + +This is a review copy of `CM_ANALYSIS_TEMPLATE.md` with legacy field +descriptions merged where they match fields in the current +`cmAnalysis_template.json`. + +Fields marked with `Needs review` exist in the current template but did not have +a matching description in the provided legacy text. + +## Top-Level Shape + +- `description`: analytic settings profile name. +- `getDbCohortMethodDataArgs`: settings used when extracting CohortMethod data. +- `createStudyPopArgs`: settings used to define the study population. +- `trimByPsArgs`: propensity-score trimming settings, or `null`. +- `matchOnPsArgs`: propensity-score matching settings, or `null`. +- `stratifyByPsArgs`: propensity-score stratification settings, or `null`. +- `createPsArgs`: propensity-score model settings, or `null`. +- `fitOutcomeModelArgs`: outcome model settings. + +## Field Notes + +### `getDbCohortMethodDataArgs` + +- `studyStartDate`, `studyEndDate`: date strings in `yyyyMMdd` format, or blank + strings when not restricted. + - Meaning: Study start and end dates can be used to limit the analyses to a + specific period. The study end date also truncates risk windows, meaning no + outcomes beyond the study end date will be considered. Leave blank to use + all time. +- `firstExposureOnly`: `true` or `false`. + - Meaning: Can be used to restrict to the first exposure per patient. +- `removeDuplicateSubjects`: one of `keep all`, `keep first`, `remove all`, + or `keep first, truncate to second`. + - Meaning: What happens when a subject is in both target and comparator + cohorts. `keep all` keeps subjects in both cohorts, which can double-count + subjects and outcomes. `keep first` keeps the subject in the first cohort + that occurred. `remove all` removes the subject from both cohorts. + `keep first, truncate to second`. +- `restrictToCommonPeriod`: `true` or `false`. + - Meaning: Should the study be restricted to the period when both exposures + are present, such as when both drugs are on the market? +- `washoutPeriod`: non-negative integer number of days. + - Meaning: The minimum required continuous observation time prior to index + date for a person to be included in the cohort. +- `maxCohortSize`: non-negative integer; `0` means no maximum. + - Meaning: If either the target or the comparator cohort is larger than this number it will be sampled to this size. + +### `createStudyPopArgs` + +- `removeSubjectsWithPriorOutcome`: `true` or `false`. + - Meaning: We can choose to remove subjects that have the outcome prior to + the risk window start. +- `priorOutcomeLookback`: non-negative integer lookback window. + - Meaning: If we choose to remove people who had the outcome before, this + controls how many days to look back when identifying prior outcomes. +- `minDaysAtRisk`: non-negative integer. + - Meaning: A patient with zero days at risk adds no information, so the minimum days at risk is normally set at one day. If there is a known latency for the side effect, then this may be increased to get a more informative proportion. +- `riskWindowStart`, `riskWindowEnd`: integer offsets from the selected anchor. + - Meaning: Time-at-risk can start one day after cohort start, so one day + after treatment initiation. Starting later than cohort start can exclude + outcome events on the day of treatment initiation when they are not + considered biologically plausible consequences of the drug. Time-at-risk + can end at cohort end, meaning when exposure stops, for an on-treatment + design. It can also end at a fixed duration after cohort entry regardless + of whether exposure continues, for an intent-to-treat design. A very large + end offset, such as 99999 days after cohort entry, effectively follows + subjects until observation end. +- `startAnchor`, `endAnchor`: one of `cohort start` or `cohort end`. + - Meaning: The anchors determine whether the risk-window offsets are measured + from cohort start or cohort end. +- `censorAtNewRiskWindow`: `true` or `false`. + - Meaning: If duplicate-subject options such as `keep all` or `keep first` + are selected, we may wish to censor the time when a person is in both + cohorts. + +### `trimByPsArgs` +- (default) Use `null` when no PS trimming is selected. + - Meaning: We can opt to trim the study population, removing people with extreme PS values. We can choose to remove the top and bottom percentage, or we can remove subjects whose preference score falls outside the range we specify. Trimming the cohorts is generally not recommended because it requires discarding observations, which reduces statistical power. It may be desirable to trim in some cases, for example when using IPTW. +- `trimFraction`: For percent trimming (a fraction, so 5 percent is represented as `0.05`).Set `null` if equipose trimming. +- `equipoiseBounds`: For equipoise trimming. Set `null` if percent trimming. + +### `matchOnPsArgs` + +- Use an object only when matching on propensity score. Use `null` when stratifying by PS or when no PS adjustment is selected. + - Meaning: We can choose to match on the propensity score. When matching, + specify the maximum number of people from the comparator group to match to + each person in the target group, and specify the caliper. +- `maxRatio`: a non-negative integer; `0` means no maximum. + - Meaning: The maximum number of people from the comparator group to match to + each person in the target group. Typical values are `1` for one-to-one + matching or a large number, such as `100`, for variable-ratio matching. +- `caliper`: numeric; `0` means no caliper is used. + - Meaning: The maximum allowed difference between propensity scores to allow + a match. +- `caliperScale`: one of `propensity score`, `standardized`, or `standardized logit`. + - Meaning: The caliper can be defined on the propensity score scale, the + standardized scale in standard deviations of the propensity score + distributions, or the standardized logit scale in standard deviations after + logit transformation to make the propensity score more normally + distributed. + +### `stratifyByPsArgs` + +- Use an object only when stratifying by propensity score. Use `null` when matching on PS or when no PS adjustment is selected. + - Meaning: We can choose to stratify on the propensity score. +- `numberOfStrata`: a positive integer. + - Meaning: When stratifying, specify the number of strata. +- `baseSelection`: one of `all`, `target`, or `comparator`. + - Meaning: When stratifying, specify whether strata are based on the target, + comparator, or entire study population. + +### `createPsArgs` + +- Use `null` when no PS model is needed. +- `maxCohortSizeForFitting`: a non-negative integer; `0` means no downsampling. + - Meaning: The maximum number of people to include in the propensity score + model when fitting. +- `errorOnHighCorrelation`: `true` or `false`. + - Meaning: If any covariate has an unusually high correlation, either + positive or negative, this will throw an error. +- `prior` and `control` are `null` when regularization is disabled. +- `prior.priorType`: currently `laplace`. + - Meaning: Specify the prior distribution. +- `prior.useCrossValidation`: `true` or `false`. + - Meaning: Perform cross-validation to determine prior variance. +- `control.cvType`: `auto` or `grid`. + - Meaning: Cross-validation search type. +- `control.noiseLevel`: `silent`, `quiet`, or `noisy`. + - Meaning: Noise level for Cyclops screen output. +- `control.startingVariance`: numeric; `-1` means estimate from data. + - Meaning: Starting variance for auto-search cross-validation. `-1` means use + an estimate based on the data. +- `control.tolerance`: numeric convergence tolerance. + - Meaning: Maximum relative change in convergence criterion from successive + iterations. +- `control.fold`: number of random folds. + - Meaning: Number of random folds to employ in cross-validation. +- `control.cvRepetitions`: number of cross-validation repetitions. + - Meaning: Number of repetitions of cross-validation. +- `control.resetCoefficients`: `true` or `false`. + - Meaning: Reset all coefficients to 0 between model fits under + cross-validation. + +### `fitOutcomeModelArgs` + +- `modelType`: one of `logistic`, `poisson`, or `cox`. + - Meaning: The statistical model used to estimate the relative risk of the + outcome between target and comparator cohorts. +- `stratified`: `true` or `false`. + - Meaning: Whether the regression should be conditioned on the strata. For + one-to-one matching this is likely unnecessary and may lose power. For + stratification or variable-ratio matching it is required. +- `useCovariates`: `true` or `false`. + - Meaning: Covariates can be added to the outcome model to adjust the + analysis. The recommended default is to keep the outcome model as simple as + possible and not include additional covariates. +- `inversePtWeighting`: `true` or `false`. + - Meaning: Instead of stratifying or matching on the propensity score, inverse + probability of treatment weighting can be used. +- `prior` and `control` are `null` when regularization is disabled. +- `prior.priorType`: currently `laplace`. + - Meaning: Specify the prior distribution. +- `prior.useCrossValidation`: `true` or `false`. + - Meaning: Perform cross-validation to determine prior variance. +- `control` follows the same field conventions as `createPsArgs.control`. + - Meaning: The outcome-model control object uses the same conventions for + tolerance, cross-validation type, folds, repetitions, noise level, + coefficient reset, and starting variance as `createPsArgs.control`. + +## Generation Rules + +- Matching and stratification are mutually exclusive: + - `matchOnPsArgs` object and `stratifyByPsArgs = null`, or + - `matchOnPsArgs = null` and `stratifyByPsArgs` object, or + - both `null` when no PS adjustment is selected. +- If trimming is selected without matching or stratification, `createPsArgs` + should still be present because PS values are required for trimming. +- The generated artifact should be valid JSON with no comments. diff --git a/mcp_server/prompts/cohort_methods/cmAnalysis_template.json b/mcp_server/prompts/cohort_methods/cmAnalysis_template.json new file mode 100644 index 0000000..ac3b96b --- /dev/null +++ b/mcp_server/prompts/cohort_methods/cmAnalysis_template.json @@ -0,0 +1,65 @@ +{ + "description": "", + "getDbCohortMethodDataArgs": { + "studyStartDate": "", + "studyEndDate": "", + "firstExposureOnly": false, + "removeDuplicateSubjects": "keep all", + "restrictToCommonPeriod": false, + "washoutPeriod": 365, + "maxCohortSize": 0 + }, + "createStudyPopArgs": { + "removeSubjectsWithPriorOutcome": true, + "priorOutcomeLookback": 99999, + "minDaysAtRisk": 1, + "riskWindowStart": 1, + "startAnchor": "cohort start", + "riskWindowEnd": 0, + "endAnchor": "cohort end", + "censorAtNewRiskWindow": false + }, + "trimByPsArgs": null, + "matchOnPsArgs": { + "maxRatio": 1, + "caliper": 0.2, + "caliperScale": "standardized logit" + }, + "stratifyByPsArgs": null, + "createPsArgs": { + "maxCohortSizeForFitting": 250000, + "errorOnHighCorrelation": true, + "prior": { + "priorType": "laplace", + "useCrossValidation": true + }, + "control": { + "tolerance": 2e-7, + "cvType": "auto", + "fold": 10, + "cvRepetitions": 10, + "noiseLevel": "silent", + "resetCoefficients": true, + "startingVariance": 0.01 + } + }, + "fitOutcomeModelArgs": { + "modelType": "cox", + "stratified": false, + "useCovariates": false, + "inversePtWeighting": false, + "prior": { + "priorType": "laplace", + "useCrossValidation": true + }, + "control": { + "tolerance": 2e-7, + "cvType": "auto", + "fold": 10, + "cvRepetitions": 10, + "noiseLevel": "quiet", + "resetCoefficients": true, + "startingVariance": 0.01 + } + } +} diff --git a/mcp_server/prompts/cohort_methods/instruction_cohort_methods_specs.md b/mcp_server/prompts/cohort_methods/instruction_cohort_methods_specs.md new file mode 100644 index 0000000..fa7fc9b --- /dev/null +++ b/mcp_server/prompts/cohort_methods/instruction_cohort_methods_specs.md @@ -0,0 +1,13 @@ + +From the provided , extract the key information and update the + JSON to configure a population-level +estimation study using the OMOP-CDM. +Leave any settings at their default values if they are not specified in the . +Refer to the fields and value types provided in the +and do not add any additional fields. +For each fields, refer to to ensure accurate mapping of the relevant information from to the corresponding JSON structure. +For each analytic settings section used by the R shell +(study_population, time_at_risk, propensity_score_adjustment, outcome_model), +provide a brief rationale and a confidence rating (high | medium | low). +Follow the exactly. + diff --git a/mcp_server/prompts/cohort_methods/output_style_cohort_methods_specs.md b/mcp_server/prompts/cohort_methods/output_style_cohort_methods_specs.md new file mode 100644 index 0000000..24045a4 --- /dev/null +++ b/mcp_server/prompts/cohort_methods/output_style_cohort_methods_specs.md @@ -0,0 +1,15 @@ + +Return exactly one fenced JSON block with the shape: +```json +{ + "specifications": { ... full updated cmAnalysis spec ... }, + "sectionRationales": { + "study_population": { "rationale": "...", "confidence": "high|medium|low" }, + "time_at_risk": { "rationale": "...", "confidence": "high|medium|low" }, + "propensity_score_adjustment": { "rationale": "...", "confidence": "high|medium|low" }, + "outcome_model": { "rationale": "...", "confidence": "high|medium|low" } + } +} +``` +No text outside the fenced block. + diff --git a/mcp_server/prompts/phenotype/output_schema_cohort_methods_intent_split.json b/mcp_server/prompts/phenotype/output_schema_cohort_methods_intent_split.json new file mode 100644 index 0000000..a42ecc2 --- /dev/null +++ b/mcp_server/prompts/phenotype/output_schema_cohort_methods_intent_split.json @@ -0,0 +1,23 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "cohort_methods_intent_split_output", + "type": "object", + "properties": { + "status": {"type": "string", "enum": ["ok", "needs_clarification"]}, + "plan": {"type": "string"}, + "target_statement": {"type": "string"}, + "comparator_statement": {"type": "string"}, + "outcome_statement": {"type": "string"}, + "outcome_statements": { + "type": "array", + "items": {"type": "string"} + }, + "rationale": {"type": "string"}, + "questions": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["status", "plan", "target_statement", "comparator_statement", "outcome_statement", "outcome_statements", "rationale"], + "additionalProperties": false +} diff --git a/mcp_server/prompts/phenotype/output_schema_phenotype_index_keywords.json b/mcp_server/prompts/phenotype/output_schema_phenotype_index_keywords.json new file mode 100644 index 0000000..93f91e9 --- /dev/null +++ b/mcp_server/prompts/phenotype/output_schema_phenotype_index_keywords.json @@ -0,0 +1,13 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "phenotype_index_keywords_output", + "type": "object", + "properties": { + "retrieval_keywords": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["retrieval_keywords"], + "additionalProperties": false +} diff --git a/mcp_server/prompts/phenotype/output_schema_phenotype_recommendation_intent_facets.json b/mcp_server/prompts/phenotype/output_schema_phenotype_recommendation_intent_facets.json new file mode 100644 index 0000000..1da0ec3 --- /dev/null +++ b/mcp_server/prompts/phenotype/output_schema_phenotype_recommendation_intent_facets.json @@ -0,0 +1,73 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "phenotype_recommendation_intent_facets_output", + "type": "object", + "properties": { + "plan": { + "type": "string" + }, + "intent_facets": { + "type": "object", + "properties": { + "condition_or_topic": { + "type": "string" + }, + "phenotype_role": { + "type": "string" + }, + "care_setting": { + "type": "string" + }, + "population_cue": { + "type": "string" + }, + "validation_preference": { + "type": "string" + }, + "executability_preference": { + "type": "string" + }, + "geography_coding_preference": { + "type": "string" + }, + "role_cues": { + "type": "array", + "items": { + "type": "string" + } + }, + "care_setting_cues": { + "type": "array", + "items": { + "type": "string" + } + }, + "population_cues": { + "type": "array", + "items": { + "type": "string" + } + }, + "clinical_topic_aliases": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": true + }, + "reasoning_notes": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "plan", + "intent_facets", + "reasoning_notes" + ], + "additionalProperties": false +} diff --git a/mcp_server/prompts/phenotype/output_schema_phenotype_recommendation_plan.json b/mcp_server/prompts/phenotype/output_schema_phenotype_recommendation_plan.json new file mode 100644 index 0000000..b8dbeed --- /dev/null +++ b/mcp_server/prompts/phenotype/output_schema_phenotype_recommendation_plan.json @@ -0,0 +1,32 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "phenotype_recommendation_plan_output", + "type": "object", + "properties": { + "plan": { "type": "string" }, + "intent_facets": { + "type": "object", + "properties": { + "condition_or_topic": { "type": "string" }, + "phenotype_role": { "type": "string" }, + "care_setting": { "type": "string" }, + "population_cue": { "type": "string" }, + "validation_preference": { "type": "string" }, + "executability_preference": { "type": "string" }, + "geography_coding_preference": { "type": "string" } + }, + "additionalProperties": true + }, + "shortlist_ids": { + "type": "array", + "items": { "type": "string" } + }, + "needs_more_search": { "type": "boolean" }, + "reasoning_notes": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["plan", "intent_facets", "shortlist_ids", "needs_more_search", "reasoning_notes"], + "additionalProperties": false +} diff --git a/mcp_server/prompts/phenotype/output_schema_phenotype_recommendations.json b/mcp_server/prompts/phenotype/output_schema_phenotype_recommendations.json index c6a7fb8..5885fdf 100644 --- a/mcp_server/prompts/phenotype/output_schema_phenotype_recommendations.json +++ b/mcp_server/prompts/phenotype/output_schema_phenotype_recommendations.json @@ -9,12 +9,12 @@ "items": { "type": "object", "properties": { - "cohortId": { "type": "integer" }, - "cohortName": { "type": "string" }, + "phenotype_id": { "type": "string" }, + "phenotype_name": { "type": "string" }, "justification": { "type": "string" }, "confidence": { "type": ["number", "null"] } }, - "required": ["cohortId", "cohortName", "justification"] + "required": ["phenotype_id", "phenotype_name", "justification"] } } }, diff --git a/mcp_server/prompts/phenotype/overview_cohort_methods_intent_split.md b/mcp_server/prompts/phenotype/overview_cohort_methods_intent_split.md new file mode 100644 index 0000000..e0516ff --- /dev/null +++ b/mcp_server/prompts/phenotype/overview_cohort_methods_intent_split.md @@ -0,0 +1,3 @@ +You are helping a clinical researcher translate a comparative cohort method study intent into three cohort statements: +one for the target exposure cohort, one for the comparator exposure cohort, and one for the outcome event cohort. +These statements should be short, clear, and suitable for searching the OHDSI phenotype library. diff --git a/mcp_server/prompts/phenotype/overview_phenotype.md b/mcp_server/prompts/phenotype/overview_phenotype.md index 6af0d08..5545f2a 100644 --- a/mcp_server/prompts/phenotype/overview_phenotype.md +++ b/mcp_server/prompts/phenotype/overview_phenotype.md @@ -2,4 +2,4 @@ You are the OHDSI Assistant (ACP Model) for phenotype tools. - Return ONLY valid JSON (no prose/markdown/fences). - Tasks: `phenotype_recommendations`, `phenotype_improvements`. - Keep outputs within size limits noted in the tool spec. -Do not invent cohortIds; use only allowed lists provided. +Do not invent phenotype_ids; use only allowed lists provided. diff --git a/mcp_server/prompts/phenotype/overview_phenotype_index_keywords.md b/mcp_server/prompts/phenotype/overview_phenotype_index_keywords.md new file mode 100644 index 0000000..e59738c --- /dev/null +++ b/mcp_server/prompts/phenotype/overview_phenotype_index_keywords.md @@ -0,0 +1,5 @@ +Task: `phenotype_index_keywords`. + +You are deriving compact retrieval keywords for phenotype indexing. +The output will be stored as an index optimization artifact, not as authoritative source metadata. +Stay grounded in the supplied phenotype metadata and prefer concise clinically meaningful phrases. diff --git a/mcp_server/prompts/phenotype/overview_phenotype_recommendation_intent_facets.md b/mcp_server/prompts/phenotype/overview_phenotype_recommendation_intent_facets.md new file mode 100644 index 0000000..7a6d378 --- /dev/null +++ b/mcp_server/prompts/phenotype/overview_phenotype_recommendation_intent_facets.md @@ -0,0 +1,5 @@ +You are the OHDSI Assistant (ACP Model) for phenotype recommendation intent interpretation. +- Return ONLY valid JSON (no prose/markdown/fences). +- Task: `phenotype_recommendation_intent_facets`. +- Infer intent facets from the study intent only; no phenotype candidate list is available in this step. +- Keep outputs within the size limits noted in the tool spec. diff --git a/mcp_server/prompts/phenotype/spec_cohort_methods_intent_split.md b/mcp_server/prompts/phenotype/spec_cohort_methods_intent_split.md new file mode 100644 index 0000000..e4b56f7 --- /dev/null +++ b/mcp_server/prompts/phenotype/spec_cohort_methods_intent_split.md @@ -0,0 +1,25 @@ +Return JSON that matches the output schema. +- Set status to "ok" when the study intent clearly supports all three cohort statements. +- Set status to "needs_clarification" when the target, comparator, outcome, or comparison framing is underspecified. +- Provide a short plan for how you derived the cohort statements. +- Provide a concise target cohort statement (index cohort) based on the study intent. +- Provide a concise comparator cohort statement (comparison cohort) based on the study intent. +- Provide a concise outcome cohort statement (event cohort) based on the study intent. +- Provide one or more concise outcome cohort statements in outcome_statements based on the study intent. +- Set outcome_statement to the first or primary outcome statement for compatibility. +- Include a brief rationale that connects the statements to the study intent. +- Include 1-3 clarifying questions when status is "needs_clarification". + +Use this guidance for the statements: +- Target cohort: "If you were designing an observational retrospective study with the following study intent, + what would be the target cohort for the study? In other words, the subset of the sampling frame for which + you would define an index date which would be distinct from the outcome cohort, which would be the persons + who had the event of interest." +- Comparator cohort: "If you were designing an observational retrospective cohort method study with the following + study intent, what would be the comparator cohort for the study? In other words, the subset of the sampling frame + for which you would define an index date for an alternative exposure or reference group to compare against the + target cohort." +- Outcome cohort: "If you were designing an observational retrospective study with the following study intent, + what would be the outcome cohort for the study? In other words, the subset of the sampling frame for which + you would define an event of interest that would likely occur after the index date for persons in a target + or comparator cohort." diff --git a/mcp_server/prompts/phenotype/spec_phenotype_index_keywords.md b/mcp_server/prompts/phenotype/spec_phenotype_index_keywords.md new file mode 100644 index 0000000..b9d12b6 --- /dev/null +++ b/mcp_server/prompts/phenotype/spec_phenotype_index_keywords.md @@ -0,0 +1,17 @@ +Output contract: +{ + "retrieval_keywords": ["string"] +} + +### HEURISTICS/RULES +For `phenotype_index_keywords` +- Return 6 to 12 short keyword phrases unless the phenotype metadata is too sparse. +- Prefer disease, syndrome, clinical focus, population, setting, code-family, and methodology cues. +- Each keyword should usually be 1 to 4 words. Acronyms are allowed. +- Avoid stop words, generic filler, and full-sentence fragments. +- Do not invent unsupported facts. +- Use supplied source metadata, concept labels, and methodology cues when helpful. + +Constraints: +- JSON only; no markdown/fences. +- Keep output compact. diff --git a/mcp_server/prompts/phenotype/spec_phenotype_recommendation_intent_facets.md b/mcp_server/prompts/phenotype/spec_phenotype_recommendation_intent_facets.md new file mode 100644 index 0000000..15cb0bd --- /dev/null +++ b/mcp_server/prompts/phenotype/spec_phenotype_recommendation_intent_facets.md @@ -0,0 +1,42 @@ +Tool: phenotype_recommendation_intent_facets +Output contract: +{ + "plan": "string <=300 chars", + "intent_facets": { + "condition_or_topic": "string", + "clinical_topic_aliases": ["string <=60 chars"], + "phenotype_role": "diagnosis|outcome|screening|severity|procedure|medication_based|risk_score|mixed|unknown", + "care_setting": "outpatient|inpatient|ed|any|unknown", + "population_cue": "string", + "validation_preference": "required|preferred|not_specified", + "executability_preference": "prefer_native_ohdsi|allow_translation|not_specified", + "geography_coding_preference": "us_omop|uk_read|va|not_specified", + "role_cues": ["string <=40 chars"], + "care_setting_cues": ["string <=40 chars"], + "population_cues": ["string <=40 chars"] + }, + "reasoning_notes": ["string <=160 chars"] +} + +### HEURISTICS/RULES +For `phenotype_recommendation_intent_facets` +- Infer intent facets from the study intent only. +- Do not use candidate phenotypes because none are provided in this step. +- Preserve the user disease/topic faithfully; do not broaden it to related comorbidities or outcomes. +- Normalize specific wording into the canonical facet categories when the user intent clearly implies them. +- Collapse narrow lexical items into broader semantic cues when appropriate. +- Examples: insulin, metformin, GLP-1 agonist, sulfonylurea -> medication/drug cue; clinic, office, ambulatory -> outpatient cue; CABG, repair, postoperative -> procedure cue. +- Populate `role_cues`, `care_setting_cues`, and `population_cues` with short normalized cue labels that explain why the canonical facet was chosen. +- Prefer broad semantic cue labels over copying raw surface forms verbatim. +- Inside `intent_facets`, include optional `clinical_topic_aliases` when the study intent uses an abbreviation, acronym, shorthand, colloquial clinical phrase, or alternate wording that could map to a more standard disease/topic name. +- `clinical_topic_aliases` must be a short array of strings with at most 5 items. +- Include only exact abbreviation expansions or near-synonymous phrasings of the same main condition/topic. +- Do not include broader diseases, narrower complications, procedures, treatments, biomarkers, or speculative related concepts in `clinical_topic_aliases`. +- If the topic is already standard and unambiguous, `clinical_topic_aliases` may be empty. +- Good examples: ADRD -> Alzheimer's disease, Dementia; GI bleed -> Gastrointestinal bleeding, Gastrointestinal hemorrhage; COPD -> Chronic obstructive pulmonary disease. +- Use `unknown` or `not_specified` when the intent does not support a stronger claim. +- Keep reasoning sparse and grounded in the wording of the user intent. + +Constraints: +- JSON only; no markdown/fences. +- Keep output < 8 KB. diff --git a/mcp_server/prompts/phenotype/spec_phenotype_recommendation_plan.md b/mcp_server/prompts/phenotype/spec_phenotype_recommendation_plan.md new file mode 100644 index 0000000..35f9a5c --- /dev/null +++ b/mcp_server/prompts/phenotype/spec_phenotype_recommendation_plan.md @@ -0,0 +1,32 @@ +Tool: phenotype_recommendation_plan +Output contract: +{ + "plan": "string <=300 chars", + "intent_facets": { + "condition_or_topic": "string", + "phenotype_role": "diagnosis|outcome|screening|severity|procedure|medication_based|risk_score|mixed|unknown", + "care_setting": "outpatient|inpatient|ed|any|unknown", + "population_cue": "string", + "validation_preference": "required|preferred|not_specified", + "executability_preference": "prefer_native_ohdsi|allow_translation|not_specified", + "geography_coding_preference": "us_omop|uk_read|va|not_specified" + }, + "shortlist_ids": [""], + "needs_more_search": "boolean", + "reasoning_notes": ["string <=160 chars"] +} + +### HEURISTICS/RULES + +For `phenotype_recommendation_plan` +- Pick a small shortlist of candidates that are most worth deeper inspection. +- Prefer candidates that match the phenotype role implied by the study intent. +- Do not finalize recommendations yet; this step is only for selecting candidates for evidence hydration. +- If both clinically relevant and executable candidates exist, include at least one executable OHDSI candidate when it plausibly matches the intent. +- Use `needs_more_search=true` only when the current candidates appear systematically mismatched. + +Constraints: +- Choose up to `maxShortlist` ids provided in the request. +- Use ONLY phenotype_ids from the allowed list provided. +- If none are worth deeper review, return an empty `shortlist_ids` array. +- JSON only; no markdown/fences; keep output < 10 KB. diff --git a/mcp_server/prompts/phenotype/spec_phenotype_recommendations.md b/mcp_server/prompts/phenotype/spec_phenotype_recommendations.md index f159998..c7d9edd 100644 --- a/mcp_server/prompts/phenotype/spec_phenotype_recommendations.md +++ b/mcp_server/prompts/phenotype/spec_phenotype_recommendations.md @@ -4,8 +4,8 @@ Output contract: "plan": "string <=300 chars", "phenotype_recommendations": [ { - "cohortId": "", - "cohortName": "string", + "phenotype_id": "", + "phenotype_name": "string", "justification": "string <=200 chars", "confidence": "number 0-1 (optional)" } @@ -19,14 +19,14 @@ For `phenotype_recommendations` Constraints: - Choose up to maxResults provided in the request. -- Use ONLY cohortIds from the allowed list provided. +- Use ONLY phenotype_ids from the allowed list provided. - If no matches, return an empty phenotype_recommendations array. - JSON only; no markdown/fences; keep output < 10 KB. Example: { "plan": "Rank phenotypes matching Parkinson’s treatment and outcomes.", "phenotype_recommendations": [ - { "cohortId": 33, "cohortName": "Parkinsons", "justification": "Captures PD diagnosis aligned with study intent.", "confidence": 0.78 }, - { "cohortId": 1197, "cohortName": "PD Meds", "justification": "Medication exposure conceptually linked to outcome comparisons.", "confidence": 0.64 } + { "phenotype_id": "ohdsi:33", "phenotype_name": "Parkinsons", "justification": "Captures PD diagnosis aligned with study intent.", "confidence": 0.78 }, + { "phenotype_id": "cipher:1197", "phenotype_name": "PD Meds", "justification": "Medication exposure conceptually linked to outcome comparisons.", "confidence": 0.64 } ] } diff --git a/mcp_server/scripts/build_phenotype_index.py b/mcp_server/scripts/build_phenotype_index.py index caf8d30..28a7c1b 100644 --- a/mcp_server/scripts/build_phenotype_index.py +++ b/mcp_server/scripts/build_phenotype_index.py @@ -9,11 +9,32 @@ import os import pickle import re +import shutil from typing import Any, Dict, Iterable, List, Optional, Tuple -from study_agent_mcp.retrieval.index import EmbeddingClient, _hash_text, _tokenize +from study_agent_mcp.retrieval.index import EmbeddingClient, _hash_text, _load_catalog, _tokenize -_SPLIT_RE = re.compile(r"[;,|\\s]+") +_SPLIT_RE = re.compile(r"[;,|]+") +_METHOD_FAMILY_RULES = { + "phecode": [r"\bphecode\b", r"\bphecodes\b"], + "map": [r"\bmap\b"], + "mvp": [r"\bmvp\b", r"million veteran program"], + "gw": [r"\bgw\b", r"genome[- ]wide", r"gwas"], + "gwphewas": [r"\bgwphewas\b", r"gwphewas", r"phewas"], +} +_STOPWORD_RETRIEVAL_TERMS = { + "a", + "an", + "and", + "for", + "from", + "in", + "of", + "or", + "the", + "to", + "with", +} def _parse_int(value: Any) -> Optional[int]: @@ -23,12 +44,20 @@ def _parse_int(value: Any) -> Optional[int]: return None +def _parse_float(value: Any) -> Optional[float]: + try: + return float(value) + except (TypeError, ValueError): + return None + + def _parse_int_list(value: Any) -> List[int]: if value is None: return [] if isinstance(value, (list, tuple)): - return [_parse_int(v) for v in value if _parse_int(v) is not None] - tokens = re.findall(r"\\d+", str(value)) + parsed = [_parse_int(v) for v in value] + return [v for v in parsed if v is not None] + tokens = re.findall(r"\d+", str(value)) return [int(tok) for tok in tokens] @@ -38,8 +67,120 @@ def _split_tags(value: Any) -> List[str]: if isinstance(value, list): items = value else: - items = _SPLIT_RE.split(str(value)) - return [item.strip("#").strip() for item in items if item.strip()] + text = str(value).replace("#", ",") + items = _SPLIT_RE.split(text) + cleaned: List[str] = [] + for item in items: + text = str(item).strip().strip("#").strip() + if text: + cleaned.append(text) + return list(dict.fromkeys(cleaned)) + + +def _compact_text_parts(parts: Iterable[Any]) -> List[str]: + cleaned: List[str] = [] + for part in parts: + if part is None: + continue + text = str(part).strip() + if text: + cleaned.append(text) + return cleaned + + +def _join_text(parts: Iterable[Any]) -> str: + return "\n\n".join(_compact_text_parts(parts)) + + +def _dedupe_texts(values: Iterable[Any]) -> List[str]: + seen = set() + cleaned: List[str] = [] + for value in values: + if value is None: + continue + text = re.sub(r"\s+", " ", str(value).strip()) + if not text: + continue + if re.fullmatch(r"\d+", text): + continue + lowered = text.lower() + if lowered in seen: + continue + seen.add(lowered) + cleaned.append(text) + return cleaned + + +def _derive_retrieval_keywords(values: Iterable[Any], max_terms: int = 32) -> List[str]: + keywords: List[str] = [] + seen = set() + for value in values: + if value is None: + continue + text = re.sub(r"\s+", " ", str(value).strip(" ,;|")) + if not text: + continue + if re.fullmatch(r"\d+", text): + continue + lowered = text.lower() + if lowered in _STOPWORD_RETRIEVAL_TERMS or lowered in seen: + continue + seen.add(lowered) + keywords.append(text) + if len(keywords) >= max_terms: + break + return keywords + + +def _definition_filename(phenotype_id: str) -> str: + safe = phenotype_id.replace(":", "__") + safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", safe) + return f"{safe}.json" + + +_PROMPT_CACHE: Dict[str, Dict[str, Any]] = {} + + +def _prompt_dir() -> str: + return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "prompts", "phenotype")) + + +def _load_text(path: str) -> str: + with open(path, "r", encoding="utf-8") as handle: + return handle.read().strip() + + +def _load_json(path: str) -> Dict[str, Any]: + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + + +def _load_keyword_prompt_bundle() -> Dict[str, Any]: + cached = _PROMPT_CACHE.get("phenotype_index_keywords") + if cached is not None: + return cached + base = _prompt_dir() + payload = { + "overview": _load_text(os.path.join(base, "overview_phenotype_index_keywords.md")), + "spec": _load_text(os.path.join(base, "spec_phenotype_index_keywords.md")), + "output_schema": _load_json(os.path.join(base, "output_schema_phenotype_index_keywords.json")), + } + _PROMPT_CACHE["phenotype_index_keywords"] = payload + return payload + + +def _load_recommendation_metadata_prompt_bundle() -> Dict[str, Any]: + cached = _PROMPT_CACHE.get("phenotype_index_recommendation_metadata") + if cached is not None: + return cached + base = _prompt_dir() + payload = { + "overview": _load_text(os.path.join(base, "overview_phenotype_index_recommendation_metadata.md")), + "spec": _load_text(os.path.join(base, "spec_phenotype_index_recommendation_metadata.md")), + "output_schema": _load_json(os.path.join(base, "output_schema_phenotype_index_recommendation_metadata.json")), + } + _PROMPT_CACHE["phenotype_index_recommendation_metadata"] = payload + return payload def _load_metadata(csv_path: str) -> List[Dict[str, Any]]: @@ -51,11 +192,9 @@ def _load_metadata(csv_path: str) -> List[Dict[str, Any]]: return rows -def _load_definitions(def_dir: Optional[str]) -> Dict[int, Dict[str, Any]]: +def _load_ohdsi_definitions(def_dir: Optional[str]) -> Dict[int, Dict[str, Any]]: definitions: Dict[int, Dict[str, Any]] = {} - if not def_dir: - return definitions - if not os.path.isdir(def_dir): + if not def_dir or not os.path.isdir(def_dir): return definitions for name in os.listdir(def_dir): if not name.endswith(".json"): @@ -75,20 +214,642 @@ def _load_definitions(def_dir: Optional[str]) -> Dict[int, Dict[str, Any]]: return definitions -def _build_catalog_row(meta: Dict[str, Any], definition: Optional[Dict[str, Any]]) -> Dict[str, Any]: - cohort_id = _parse_int(meta.get("cohortId")) +def _load_cipher_records(cipher_dir: Optional[str]) -> List[Tuple[str, Dict[str, Any]]]: + records: List[Tuple[str, Dict[str, Any]]] = [] + if not cipher_dir or not os.path.isdir(cipher_dir): + return records + for name in sorted(os.listdir(cipher_dir)): + if not name.endswith(".json"): + continue + if name.lower().startswith("enumtype"): + continue + path = os.path.join(cipher_dir, name) + try: + with open(path, "r", encoding="utf-8") as handle: + data = json.load(handle) + except (OSError, json.JSONDecodeError): + continue + records.append((path, data)) + return records + + +def _load_cipher_enum_map(enum_path: Optional[str]) -> Dict[int, Dict[str, Any]]: + if not enum_path or not os.path.exists(enum_path): + return {} + with open(enum_path, "r", encoding="utf-8") as handle: + payload = json.load(handle) + enum_map: Dict[int, Dict[str, Any]] = {} + + def _visit(entry: Dict[str, Any], parent_id: Optional[int] = None) -> None: + enum_id = _parse_int(entry.get("id")) + if enum_id is None: + return None + enum_map[enum_id] = { + "id": enum_id, + "fieldType": entry.get("fieldType"), + "fieldName": entry.get("fieldName"), + "fieldSubType": entry.get("fieldSubType"), + "description": entry.get("description"), + "requireOther": entry.get("requireOther"), + "seqNo": entry.get("seqNo"), + "vaSpecific": bool(entry.get("vaSpecific")), + "parent_id": parent_id, + } + for child in entry.get("subEnums") or []: + if isinstance(child, dict): + _visit(child, enum_id) + + if isinstance(payload, list): + for item in payload: + if isinstance(item, dict): + _visit(item) + return enum_map + + +def _normalize_keywords(values: Iterable[Any]) -> List[str]: + seen = set() + keywords: List[str] = [] + for value in values: + if value is None: + continue + text = str(value).strip() + if not text: + continue + if re.fullmatch(r"\d+", text): + continue + lowered = text.lower() + if lowered in seen: + continue + seen.add(lowered) + keywords.append(text) + return keywords + + +def _method_family_signals(values: Iterable[str]) -> List[str]: + haystack = "\n".join(values).lower() + signals: List[str] = [] + for family, patterns in _METHOD_FAMILY_RULES.items(): + for pattern in patterns: + if re.search(pattern, haystack): + signals.append(f"method_family:{family}") + break + return signals + + +def _extract_methodology_summary(text: str) -> str: + if not text: + return "" + sentences = re.split(r"(?<=[.!?])\s+", text.strip()) + if not sentences: + return "" + return sentences[0][:280] + + +def _compose_retrieval_text(row: Dict[str, Any]) -> str: + topic_mentions = row.get("topic_mentions") or {} + target_vs_context = row.get("target_vs_context_conditions") or {} + parts = [ + row.get("name"), + row.get("short_description"), + row.get("long_description"), + " ".join(row.get("tags") or []), + " ".join(row.get("raw_keywords") or []), + " ".join(row.get("retrieval_keywords") or []), + " ".join(row.get("retrieval_concept_labels") or []), + " ".join(row.get("ontology_keys") or []), + " ".join(row.get("signals") or []), + row.get("methodology_summary"), + row.get("adaptation_notes"), + row.get("primary_clinical_topic"), + " ".join(row.get("secondary_topics") or []), + row.get("phenotype_role"), + row.get("care_setting_scope"), + row.get("population_scope"), + " ".join(topic_mentions.get("primary_topics") or []), + " ".join(topic_mentions.get("downstream_or_related_topics") or []), + " ".join(target_vs_context.get("target_conditions") or []), + row.get("recommendation_summary"), + ] + return "\n".join(_compact_text_parts(parts)) + + +def _clean_primary_topic_name(name: str) -> str: + text = re.sub(r"^(?:\[[^\]]+\]\s*)+", "", str(name or "")).strip() + return re.sub(r"\s+", " ", text) + + +def _seed_recommendation_metadata(row: Dict[str, Any]) -> Dict[str, Any]: + primary = _clean_primary_topic_name(row.get("name") or "") + return { + "primary_clinical_topic": primary, + "secondary_topics": [], + "phenotype_role": "unknown", + "care_setting_scope": "unspecified", + "population_scope": "", + "topic_mentions": { + "primary_topics": [primary] if primary else [], + "context_only_topics": [], + "downstream_or_related_topics": [], + }, + "target_vs_context_conditions": { + "target_conditions": [primary] if primary else [], + "context_conditions": [], + }, + "exclude_from_primary_topic_match": [], + "recommendation_summary": row.get("short_description") or primary or "", + "recommendation_metadata_source": "heuristic", + } + + +def _normalize_string_list(values: Any, max_items: int = 12) -> List[str]: + if isinstance(values, str): + values = [values] + if not isinstance(values, (list, tuple)): + return [] + cleaned: List[str] = [] + seen = set() + for value in values: + text = re.sub(r"\s+", " ", str(value or "").strip()) + if not text: + continue + lowered = text.lower() + if lowered in seen: + continue + seen.add(lowered) + cleaned.append(text) + if len(cleaned) >= max_items: + break + return cleaned + + +def _normalize_enum_string(value: Any, allowed: set[str], default: str) -> str: + text = str(value or "").strip().lower() + return text if text in allowed else default + + +def _normalize_recommendation_metadata(parsed: Dict[str, Any], row: Dict[str, Any]) -> Dict[str, Any]: + seeded = _seed_recommendation_metadata(row) + topic_mentions = parsed.get("topic_mentions") if isinstance(parsed.get("topic_mentions"), dict) else {} + target_vs_context = parsed.get("target_vs_context_conditions") if isinstance(parsed.get("target_vs_context_conditions"), dict) else {} + primary = re.sub(r"\s+", " ", str(parsed.get("primary_clinical_topic") or seeded["primary_clinical_topic"]).strip()) + if not primary: + primary = seeded["primary_clinical_topic"] + recommendation_summary = re.sub(r"\s+", " ", str(parsed.get("recommendation_summary") or "").strip()) + if not recommendation_summary: + recommendation_summary = seeded["recommendation_summary"] + return { + "primary_clinical_topic": primary, + "secondary_topics": _normalize_string_list(parsed.get("secondary_topics"), max_items=8), + "phenotype_role": _normalize_enum_string( + parsed.get("phenotype_role"), + { + "diagnosis", + "outcome", + "complication", + "severity", + "screening", + "procedure", + "medication_based", + "risk_score", + "comorbidity_covariate", + "mixed", + "unknown", + }, + seeded["phenotype_role"], + ), + "care_setting_scope": _normalize_enum_string( + parsed.get("care_setting_scope"), + {"outpatient", "inpatient", "ed", "mixed", "unspecified"}, + seeded["care_setting_scope"], + ), + "population_scope": re.sub(r"\s+", " ", str(parsed.get("population_scope") or "").strip()), + "topic_mentions": { + "primary_topics": _normalize_string_list(topic_mentions.get("primary_topics"), max_items=8), + "context_only_topics": _normalize_string_list(topic_mentions.get("context_only_topics"), max_items=8), + "downstream_or_related_topics": _normalize_string_list(topic_mentions.get("downstream_or_related_topics"), max_items=8), + }, + "target_vs_context_conditions": { + "target_conditions": _normalize_string_list(target_vs_context.get("target_conditions"), max_items=8), + "context_conditions": _normalize_string_list(target_vs_context.get("context_conditions"), max_items=8), + }, + "exclude_from_primary_topic_match": _normalize_string_list(parsed.get("exclude_from_primary_topic_match"), max_items=8), + "recommendation_summary": recommendation_summary, + "recommendation_metadata_source": "llm", + } + + +def _keyword_prompt_payload(row: Dict[str, Any]) -> Dict[str, Any]: + return { + "task": "phenotype_index_keyword_derivation", + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset"), + "name": row.get("name") or "", + "short_description": row.get("short_description") or "", + "long_description": _truncate_for_prompt(row.get("long_description") or "", 2400), + "tags": row.get("tags") or [], + "raw_keywords": row.get("raw_keywords") or [], + "retrieval_concept_labels": (row.get("retrieval_concept_labels") or [])[:24], + "methodology_summary": row.get("methodology_summary") or "", + "signals": [signal for signal in (row.get("signals") or []) if signal.startswith("method_family:") or signal.startswith("execution:")], + "heuristic_keywords": row.get("retrieval_keywords") or [], + "executable_definition_status": row.get("executable_definition_status") or "", + } + + +def _truncate_for_prompt(text: str, limit: int) -> str: + if len(text) <= limit: + return text + return text[:limit] + + +def _keyword_cache_key(payload: Dict[str, Any]) -> str: + phenotype_id = str(payload.get("phenotype_id") or "unknown") + source_hash = _hash_text(json.dumps(payload, sort_keys=True, ensure_ascii=True)) + return f"{phenotype_id}:{source_hash}" + + +def _build_keyword_prompt(payload: Dict[str, Any], max_terms: int) -> str: + bundle = _load_keyword_prompt_bundle() + overview = bundle.get("overview", "") + spec = bundle.get("spec", "") + schema = bundle.get("output_schema", {}) + dynamic = dict(payload) + dynamic["max_terms"] = max_terms + strict_rules = "\n\n".join([ + "STRICT OUTPUT RULES:", + spec, + "Return exactly ONE JSON object that matches the output schema.", + "Do NOT wrap output in markdown, code fences, or prose.", + "If uncertain, return the required key with an empty array.", + ]) + return "\n\n".join([ + overview, + "OUTPUT SCHEMA (JSON):", + json.dumps(schema, ensure_ascii=True), + "DYNAMIC INPUT (JSON):", + json.dumps(dynamic, ensure_ascii=True), + strict_rules, + ]) + + +def _call_keyword_llm(prompt: str) -> Dict[str, Any]: + try: + from study_agent_acp.llm_client import call_llm + except ImportError as exc: + return {"status": "disabled", "error": f"import_error:{exc}"} + result = call_llm(prompt, required_keys=["retrieval_keywords"]) + return { + "status": result.status, + "error": result.error, + "parsed_content": result.parsed_content or {}, + "schema_valid": result.schema_valid, + } + + +def _normalize_llm_keywords(values: Iterable[Any], max_terms: int) -> List[str]: + return _derive_retrieval_keywords(values, max_terms=max_terms) + + +def _apply_llm_retrieval_keywords( + row: Dict[str, Any], + keyword_cache: Dict[str, Dict[str, Any]], + enabled: bool = False, + max_terms: int = 12, +) -> Optional[Dict[str, Any]]: + fallback = _normalize_llm_keywords(row.get("retrieval_keywords") or [], max_terms=max_terms) + row["retrieval_keywords"] = fallback + row["retrieval_keywords_source"] = "heuristic" + row["retrieval_text"] = _compose_retrieval_text(row) + if not enabled: + return + + payload = _keyword_prompt_payload(row) + cache_key = _keyword_cache_key(payload) + cached = keyword_cache.get(cache_key) or {} + cached_keywords = _normalize_llm_keywords(cached.get("retrieval_keywords") or [], max_terms=max_terms) + if cached_keywords: + row["retrieval_keywords"] = cached_keywords + row["retrieval_keywords_source"] = "llm_cached" + row["retrieval_text"] = _compose_retrieval_text(row) + return None + + result = _call_keyword_llm(_build_keyword_prompt(payload, max_terms=max_terms)) + if result.get("status") == "ok": + llm_keywords = _normalize_llm_keywords((result.get("parsed_content") or {}).get("retrieval_keywords") or [], max_terms=max_terms) + if llm_keywords: + keyword_cache[cache_key] = { + "phenotype_id": row.get("phenotype_id"), + "retrieval_keywords": llm_keywords, + } + row["retrieval_keywords"] = llm_keywords + row["retrieval_keywords_source"] = "llm" + row["retrieval_text"] = _compose_retrieval_text(row) + return { + "cache_key": cache_key, + "phenotype_id": row.get("phenotype_id"), + "retrieval_keywords": llm_keywords, + } + return None + + +def _recommendation_metadata_prompt_payload(row: Dict[str, Any]) -> Dict[str, Any]: + return { + "task": "phenotype_index_recommendation_metadata", + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset"), + "name": row.get("name") or "", + "short_description": row.get("short_description") or "", + "long_description": _truncate_for_prompt(row.get("long_description") or "", 2400), + "retrieval_keywords": row.get("retrieval_keywords") or [], + "retrieval_concept_labels": (row.get("retrieval_concept_labels") or [])[:24], + "methodology_summary": row.get("methodology_summary") or "", + "signals": row.get("signals") or [], + "executable_definition_status": row.get("executable_definition_status") or "", + "execution_readiness_score": row.get("execution_readiness_score"), + } + + +def _recommendation_metadata_cache_key(payload: Dict[str, Any]) -> str: + phenotype_id = str(payload.get("phenotype_id") or "unknown") + source_hash = _hash_text(json.dumps(payload, sort_keys=True, ensure_ascii=True)) + return f"recommendation:{phenotype_id}:{source_hash}" + + +def _build_recommendation_metadata_prompt(payload: Dict[str, Any]) -> str: + bundle = _load_recommendation_metadata_prompt_bundle() + overview = bundle.get("overview", "") + spec = bundle.get("spec", "") + schema = bundle.get("output_schema", {}) + strict_rules = "\n\n".join([ + "STRICT OUTPUT RULES:", + spec, + "Return exactly ONE JSON object that matches the output schema.", + "Do NOT wrap output in markdown, code fences, or prose.", + "If uncertain, return the required keys with empty strings/arrays and conservative enum defaults.", + ]) + return "\n\n".join([ + overview, + "OUTPUT SCHEMA (JSON):", + json.dumps(schema, ensure_ascii=True), + "DYNAMIC INPUT (JSON):", + json.dumps(payload, ensure_ascii=True), + strict_rules, + ]) + + +def _call_recommendation_metadata_llm(prompt: str) -> Dict[str, Any]: + try: + from study_agent_acp.llm_client import call_llm + except ImportError as exc: + return {"status": "disabled", "error": f"import_error:{exc}"} + result = call_llm( + prompt, + required_keys=[ + "primary_clinical_topic", + "secondary_topics", + "phenotype_role", + "care_setting_scope", + "population_scope", + "topic_mentions", + "target_vs_context_conditions", + "exclude_from_primary_topic_match", + "recommendation_summary", + ], + ) + return { + "status": result.status, + "error": result.error, + "parsed_content": result.parsed_content or {}, + "schema_valid": result.schema_valid, + } + + +def _apply_llm_recommendation_metadata( + row: Dict[str, Any], + recommendation_cache: Dict[str, Dict[str, Any]], + enabled: bool = False, +) -> Optional[Dict[str, Any]]: + seeded = _seed_recommendation_metadata(row) + row.update(seeded) + row["retrieval_text"] = _compose_retrieval_text(row) + if not enabled: + return None + + payload = _recommendation_metadata_prompt_payload(row) + cache_key = _recommendation_metadata_cache_key(payload) + cached = recommendation_cache.get(cache_key) or {} + cached_payload = cached.get("recommendation_metadata") if isinstance(cached.get("recommendation_metadata"), dict) else None + if cached_payload: + normalized = _normalize_recommendation_metadata(cached_payload, row) + normalized["recommendation_metadata_source"] = "llm_cached" + row.update(normalized) + row["retrieval_text"] = _compose_retrieval_text(row) + return None + + result = _call_recommendation_metadata_llm(_build_recommendation_metadata_prompt(payload)) + if result.get("status") == "ok": + parsed = result.get("parsed_content") or {} + normalized = _normalize_recommendation_metadata(parsed, row) + cache_entry = { + "cache_key": cache_key, + "phenotype_id": row.get("phenotype_id"), + "recommendation_metadata": normalized, + } + recommendation_cache[cache_key] = cache_entry + row.update(normalized) + row["retrieval_text"] = _compose_retrieval_text(row) + return cache_entry + return None + + +def _load_jsonl_cache(path: str) -> Dict[str, Dict[str, Any]]: + if not os.path.exists(path): + return {} + cache: Dict[str, Dict[str, Any]] = {} + try: + with open(path, "r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + try: + payload = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(payload, dict): + continue + cache_key = payload.get("cache_key") + if isinstance(cache_key, str) and cache_key: + cache[cache_key] = payload + except OSError: + return {} + return cache + + +def _append_jsonl_cache_entry(path: str, entry: Dict[str, Any]) -> None: + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) + with open(path, "a", encoding="utf-8") as handle: + handle.write(json.dumps(entry, ensure_ascii=True) + "\n") + + +def _split_ohdsi_domains(value: Any) -> List[str]: + if not value: + return [] + return _dedupe_texts(re.split(r"[;,|]+", str(value))) + + +def _extract_ohdsi_concept_evidence(definition: Optional[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], Dict[str, Any], List[str]]: + concept_sets = (definition or {}).get("ConceptSets") + if not isinstance(concept_sets, list): + return ( + [], + { + "coded_terms": [], + "coverage_summary": { + "has_codes": False, + "has_labels": False, + "has_omop_mapping": False, + }, + }, + [], + ) + + grouped: Dict[str, Dict[str, Any]] = {} + retrieval_labels: List[str] = [] + for concept_set in concept_sets: + if not isinstance(concept_set, dict): + continue + concept_set_name = str(concept_set.get("name") or "").strip() + items = ((concept_set.get("expression") or {}).get("items") or []) + if concept_set_name: + retrieval_labels.append(concept_set_name) + for item in items: + if not isinstance(item, dict): + continue + concept = item.get("concept") if isinstance(item.get("concept"), dict) else {} + if not concept: + continue + vocabulary_id = str(concept.get("VOCABULARY_ID") or "Unknown").strip() or "Unknown" + group = grouped.setdefault( + vocabulary_id, + { + "system_id": vocabulary_id, + "system_name": vocabulary_id, + "subsystem_id": None, + "subsystem_name": None, + "codes": [], + "description": "", + "va_specific": False, + "concept_ids": [], + "labels": [], + "embedding_terms": [], + "domains": [], + "concept_set_names": [], + }, + ) + concept_id = _parse_int(concept.get("CONCEPT_ID")) + concept_code = str(concept.get("CONCEPT_CODE") or "").strip() + concept_name = str(concept.get("CONCEPT_NAME") or "").strip() + domain_id = str(concept.get("DOMAIN_ID") or "").strip() + if concept_id is not None: + group["concept_ids"].append(concept_id) + if concept_code: + group["codes"].append(concept_code) + if concept_name: + group["labels"].append(concept_name) + group["embedding_terms"].append(concept_name) + retrieval_labels.append(concept_name) + if domain_id: + group["domains"].append(domain_id) + if concept_set_name: + group["concept_set_names"].append(concept_set_name) + + code_systems: List[Dict[str, Any]] = [] + coded_terms: List[Dict[str, Any]] = [] + for vocabulary_id, group in grouped.items(): + codes = _dedupe_texts(group["codes"]) + labels = _dedupe_texts(group["labels"]) + concept_set_names = _dedupe_texts(group["concept_set_names"]) + domains = _dedupe_texts(group["domains"]) + concept_ids = sorted(set(group["concept_ids"])) + embedding_terms = _dedupe_texts(group["embedding_terms"] + concept_set_names + [vocabulary_id] + domains) + code_systems.append( + { + "system_id": group["system_id"], + "system_name": group["system_name"], + "subsystem_id": None, + "subsystem_name": None, + "codes": codes, + "description": ", ".join(concept_set_names[:3]), + "va_specific": False, + "concept_ids": concept_ids, + "concept_names": labels, + "domains": domains, + "concept_set_names": concept_set_names, + } + ) + coded_terms.append( + { + "system": vocabulary_id, + "codes": codes, + "labels": labels, + "omop_candidates": concept_ids, + "embedding_terms": embedding_terms, + "concept_set_names": concept_set_names, + "domains": domains, + } + ) + + concept_evidence = { + "coded_terms": coded_terms, + "coverage_summary": { + "has_codes": any(item.get("codes") for item in coded_terms), + "has_labels": any(item.get("labels") for item in coded_terms), + "has_omop_mapping": any(item.get("omop_candidates") for item in coded_terms), + }, + } + return code_systems, concept_evidence, _dedupe_texts(retrieval_labels + list(grouped.keys())) + + +def _copy_definition(output_dir: str, phenotype_id: str, data: Dict[str, Any]) -> str: + definitions_dir = os.path.join(output_dir, "definitions") + os.makedirs(definitions_dir, exist_ok=True) + filename = _definition_filename(phenotype_id) + path = os.path.join(definitions_dir, filename) + with open(path, "w", encoding="utf-8") as handle: + json.dump(data, handle, ensure_ascii=True) + return filename + + +def _copy_source_file(output_dir: str, phenotype_id: str, src_path: str) -> str: + definitions_dir = os.path.join(output_dir, "definitions") + os.makedirs(definitions_dir, exist_ok=True) + filename = _definition_filename(phenotype_id) + dst_path = os.path.join(definitions_dir, filename) + shutil.copyfile(src_path, dst_path) + return filename + + +def _build_ohdsi_row(meta: Dict[str, Any], definition: Optional[Dict[str, Any]]) -> Dict[str, Any]: + native_id = _parse_int(meta.get("cohortId")) + if native_id is None: + raise ValueError("OHDSI row missing cohortId") + phenotype_id = f"ohdsi:{native_id}" name = meta.get("cohortName") or meta.get("cohortNameLong") or meta.get("cohortNameFormatted") or "" short_description = meta.get("logicDescription") or meta.get("notes") or "" + long_description = _join_text([ + meta.get("logicDescription"), + meta.get("notes"), + (definition or {}).get("Description"), + (definition or {}).get("description"), + ]) tags = _split_tags(meta.get("hashTag")) - ontology_keys = _parse_int_list(meta.get("recommendedReferentConceptIds")) - signals = [] - status = meta.get("status") - if status: - signals.append(f"status:{status}") - if meta.get("isReferenceCohort"): - signals.append("reference") - if meta.get("hasWashoutInText"): - signals.append("washout") + raw_keywords: List[str] = [] + ontology_keys = [str(value) for value in _parse_int_list(meta.get("recommendedReferentConceptIds"))] + code_systems, concept_evidence, retrieval_concept_labels = _extract_ohdsi_concept_evidence(definition) + ohdsi_domains = _split_ohdsi_domains(meta.get("domainsInEntryEvents")) logic_features = { "numberOfInclusionRules": _parse_int(meta.get("numberOfInclusionRules")) or 0, "numberOfConceptSets": _parse_int(meta.get("numberOfConceptSets")) or 0, @@ -98,48 +859,381 @@ def _build_catalog_row(meta: Dict[str, Any], definition: Optional[Dict[str, Any] "hasObservationType": meta.get("hasObservationType") or "", "hasProcedureType": meta.get("hasProcedureType") or "", } + methodology_summary = ( + f"Native OHDSI cohort with {logic_features['numberOfConceptSets']} concept sets and " + f"{logic_features['numberOfInclusionRules']} inclusion rules." + ) + signals = ["source:ohdsi", "execution:native_ohdsi"] + status = (meta.get("status") or "").strip() + if status: + signals.append(f"status:{status}") + if str(meta.get("isReferenceCohort") or "").strip() not in ("", "0", "FALSE", "False", "false"): + signals.append("reference") + if str(meta.get("hasWashoutInText") or "").strip() not in ("", "0", "FALSE", "False", "false"): + signals.append("washout") - pop_keywords = list(dict.fromkeys(_tokenize(" ".join([name, short_description, " ".join(tags)])))) - if definition: - description = definition.get("description") or definition.get("name") or "" - if description: - pop_keywords.extend(_tokenize(description)) - pop_keywords = list(dict.fromkeys(pop_keywords)) - - source_meta = { - "librarian": meta.get("librarian"), - "status": meta.get("status"), - "addedVersion": meta.get("addedVersion"), - "createdDate": meta.get("createdDate"), - "modifiedDate": meta.get("modifiedDate"), - "lastModifiedBy": meta.get("lastModifiedBy"), + provenance = { + "created_at": meta.get("createdDate") or "", + "modified_at": meta.get("modifiedDate") or "", + "version": meta.get("addedVersion") or "", + "status": status, + "authors": [], + "contacts": [], + "sources": ["OHDSI Phenotype Library"], + "publications": [], + "maintainer": meta.get("librarian") or "", } - - return { - "cohortId": cohort_id, + population_features = { + "logic_features": logic_features, + "demographicCriteria": meta.get("demographicCriteria") or "", + "demographicCriteriaAge": meta.get("demographicCriteriaAge") or "", + "demographicCriteriaGender": meta.get("demographicCriteriaGender") or "", + "restrictedByVisit": meta.get("restrictedByVisit") or "", + } + validation_features = { + "validated": None, + "validation_description": "", + "adjudication_performed": None, + "adjudication_method": "", + "adjudication_level_type": "", + } + adaptation_notes = "Native OHDSI cohort likely requires parameter or concept-set adjustment for local study intent." + translation_inputs = { + "source_type": "ohdsi_cohort_definition", + "cohort_id": native_id, + "name": name, + "logic_description": short_description, + "recommended_referent_concept_ids": ontology_keys, + "logic_features": logic_features, + "domains_in_entry_events": meta.get("domainsInEntryEvents") or "", + } + retrieval_keywords = _derive_retrieval_keywords( + tags + + ohdsi_domains + + [meta.get("demographicCriteriaGender"), "native OHDSI cohort"] + + [f"entry domain {domain}" for domain in ohdsi_domains] + + [f"{logic_features['numberOfConceptSets']} concept sets" if logic_features["numberOfConceptSets"] else ""] + + [f"{logic_features['numberOfInclusionRules']} inclusion rules" if logic_features["numberOfInclusionRules"] else ""] + ) + row = { + "phenotype_id": phenotype_id, + "source_dataset": "ohdsi_phenotype_library", + "source_record_type": "cohort_definition", + "source_native_id": native_id, "name": name, "short_description": short_description, + "long_description": long_description, "tags": tags, - "ontology_keys": ontology_keys, + "raw_keywords": raw_keywords, + "retrieval_keywords": retrieval_keywords, + "retrieval_concept_labels": retrieval_concept_labels, + "methodology_summary": methodology_summary, "signals": signals, - "logic_features": logic_features, - "pop_keywords": pop_keywords, - "source_meta": source_meta, + "ontology_keys": ontology_keys, + "code_systems": code_systems, + "concept_evidence": concept_evidence, + "validation_features": validation_features, + "population_features": population_features, + "provenance": provenance, + "executable_definition_status": "native_ohdsi", + "executable_definition_source": "ohdsi_library", + "execution_readiness_score": 1.0, + "adaptation_notes": adaptation_notes, + "translation_inputs": translation_inputs, + "retrieval_keywords_source": "heuristic", + "retrieval_text": "", + "source_meta": { + "status": status, + "librarian": meta.get("librarian") or "", + "addedVersion": meta.get("addedVersion") or "", + "createdDate": meta.get("createdDate") or "", + "modifiedDate": meta.get("modifiedDate") or "", + "lastModifiedBy": meta.get("lastModifiedBy") or "", + }, + "source_payload_ref": "", + "definition_ref": "", + } + row["retrieval_text"] = _compose_retrieval_text(row) + return row + + +def _resolve_enum_label(enum_map: Dict[int, Dict[str, Any]], enum_id: Optional[int]) -> Optional[str]: + if enum_id is None: + return None + item = enum_map.get(enum_id) + if not item: + return None + return item.get("fieldName") or None + + +def _extract_cipher_code_systems( + assoc_codes: List[Dict[str, Any]], + enum_map: Dict[int, Dict[str, Any]], +) -> Tuple[List[Dict[str, Any]], Dict[str, Any], List[str]]: + code_systems: List[Dict[str, Any]] = [] + coded_terms: List[Dict[str, Any]] = [] + label_bits: List[str] = [] + has_labels = False + for entry in assoc_codes or []: + code_type = _parse_int(entry.get("codeType")) + sub_code_type = _parse_int(entry.get("subCodeType")) + system_name = _resolve_enum_label(enum_map, code_type) + subsystem_name = _resolve_enum_label(enum_map, sub_code_type) + system_meta = enum_map.get(code_type or -1) or {} + codes = [] + for code in entry.get("codes") or []: + if not isinstance(code, dict): + continue + value = str(code.get("code") or "").strip() + if value: + codes.append(value) + codes = list(dict.fromkeys(codes)) + description = entry.get("description") + if description: + has_labels = True + if system_name: + label_bits.append(system_name) + has_labels = True + if subsystem_name: + label_bits.append(subsystem_name) + has_labels = True + code_systems.append( + { + "system_id": code_type, + "system_name": system_name, + "subsystem_id": sub_code_type, + "subsystem_name": subsystem_name, + "codes": codes, + "description": description, + "va_specific": bool(system_meta.get("vaSpecific")), + "labels": [text for text in [system_name, subsystem_name, description] if text], + } + ) + coded_terms.append( + { + "system": system_name or str(code_type) if code_type is not None else "unknown", + "codes": codes, + "labels": [text for text in [system_name, subsystem_name, description] if text], + "omop_candidates": [], + "embedding_terms": [text for text in [system_name, subsystem_name, description] if text], + } + ) + concept_evidence = { + "coded_terms": coded_terms, + "coverage_summary": { + "has_codes": any(item.get("codes") for item in coded_terms), + "has_labels": has_labels, + "has_omop_mapping": False, + }, + } + return code_systems, concept_evidence, _dedupe_texts(label_bits) + + +def _infer_cipher_executable_status(description: str, algorithm_desc: str, code_systems: List[Dict[str, Any]]) -> str: + rich_text = f"{description}\n{algorithm_desc}".lower() + if any(item.get("codes") for item in code_systems): + return "codes_only" + if any(keyword in rich_text for keyword in ["algorithm", "identify", "outpatient", "inpatient", "criteria"]): + return "narrative_only" + return "unknown" + + +def _build_cipher_row(path: str, data: Dict[str, Any], enum_map: Dict[int, Dict[str, Any]]) -> Dict[str, Any]: + native_id = _parse_int(data.get("id")) + if native_id is None: + raise ValueError(f"CIPHER record missing id: {path}") + phenotype_id = f"cipher:{native_id}" + algorithm = data.get("algorithm") if isinstance(data.get("algorithm"), dict) else {} + description = data.get("description") or "" + algorithm_desc = algorithm.get("algorithmDesc") or "" + population_desc = algorithm.get("populationDesc") or "" + validation_desc = algorithm.get("validationDescription") or "" + publication_ack = algorithm.get("publicationAcknowledgement") or "" + short_description = description or algorithm_desc + long_description = _join_text([description, algorithm_desc, population_desc, validation_desc, publication_ack]) + + tags = _dedupe_texts([data.get("phenotypeCategory")]) + raw_keywords = _dedupe_texts( + [item.get("keyword") for item in data.get("keywords") or [] if isinstance(item, dict)] + ) + code_systems, concept_evidence, retrieval_concept_labels = _extract_cipher_code_systems( + algorithm.get("assocCodes") or [], + enum_map, + ) + methodology_summary = _extract_methodology_summary(description or algorithm_desc) + methodology_signals = _method_family_signals(tags + raw_keywords + [data.get("fullName") or "", description, algorithm_desc]) + retrieval_keywords = _derive_retrieval_keywords( + tags + + raw_keywords + + retrieval_concept_labels + + [ + item.get("otherSource") + for item in data.get("sources") or [] + if isinstance(item, dict) + ] + + [signal.split(":", 1)[1].upper() for signal in methodology_signals] + ) + signals = ["source:cipher"] + status_id = data.get("phenotypeStatusId") + if status_id is not None: + signals.append(f"status:{status_id}") + if data.get("vaDeveloped"): + signals.append("va_developed") + if data.get("majorRevision"): + signals.append("major_revision") + validated = algorithm.get("validated") + if validated is True: + signals.append("validated") + elif validated is False: + signals.append("not_validated") + if data.get("publications"): + signals.append("has_publication") + if data.get("toolLinks"): + signals.append("has_tool_link") + if algorithm.get("contacts"): + signals.append("has_contact") + for code_system in code_systems: + system_name = (code_system.get("system_name") or "").lower() + if "icd-9" in system_name: + signals.append("has_code_system:icd9") + elif "icd-10" in system_name: + signals.append("has_code_system:icd10") + elif "snomed" in system_name: + signals.append("has_code_system:snomed") + elif "medication" in system_name: + signals.append("has_code_system:medication") + signals.extend(methodology_signals) + + executable_status = _infer_cipher_executable_status(description, algorithm_desc, code_systems) + signals.append(f"execution:{executable_status}") + readiness_score = { + "codes_only": 0.45, + "narrative_only": 0.25, + "non_ohdsi_logic_only": 0.6, + "unknown": 0.15, + }.get(executable_status, 0.15) + + provenance = { + "created_at": data.get("created") or "", + "modified_at": data.get("lastModified") or "", + "version": data.get("versionInfo") or "", + "status": data.get("phenotypeStatusId"), + "authors": [ + author.get("author", {}).get("name") + for author in algorithm.get("authors") or [] + if isinstance(author, dict) and isinstance(author.get("author"), dict) and author.get("author", {}).get("name") + ], + "contacts": [ + email.get("email") + for contact in algorithm.get("contacts") or [] + if isinstance(contact, dict) + for email in contact.get("emails") or [] + if isinstance(email, dict) and email.get("email") + ], + "sources": [ + item.get("otherSource") + for item in data.get("sources") or [] + if isinstance(item, dict) and item.get("otherSource") + ], + "publications": [ + {"title": pub.get("title"), "link": pub.get("link")} + for pub in data.get("publications") or [] + if isinstance(pub, dict) + ], + "maintainer": "", + } + population_features = { + "population_description": population_desc, + "context_ids": [item.get("contextId") for item in algorithm.get("contextDevs") or [] if isinstance(item, dict)], + "data_used_start": algorithm.get("dataUsedStart"), + "data_used_end": algorithm.get("dataUsedEnd"), + } + validation_features = { + "validated": validated, + "validation_description": validation_desc, + "adjudication_performed": algorithm.get("adjudicationPerformed"), + "adjudication_method": algorithm.get("adjudicationMethod") or "", + "adjudication_level_type": algorithm.get("adjudicationLevelType"), + "validation_count": len(algorithm.get("validations") or []), + } + methodology_context = { + "family_tags": [signal.split(":", 1)[1].upper() for signal in methodology_signals], + "summary": methodology_summary, + "translation_cautions": [ + "May require OMOP concept-set expansion rather than direct code copy.", + "May represent an empirically derived grouping rather than a directly executable cohort algorithm.", + ] if methodology_signals else [], + } + adaptation_notes = ( + "CIPHER phenotype provides code evidence and narrative but requires translation into OHDSI cohort entry, exit, and era logic." + ) + if methodology_signals: + adaptation_notes = ( + "Phenotype appears derived from PheCode/MAP-style methodology and may need concept expansion and validation against available OMOP domains." + ) + translation_inputs = { + "source_type": "cipher_disease_phenotype", + "phenotype_id": native_id, + "name": data.get("fullName") or "", + "disease_summary": description, + "algorithm_narrative": algorithm_desc, + "population_description": population_desc, + "validation_description": validation_desc, + "code_systems": code_systems, + "source_family_labels": tags, + "publication_links": provenance["publications"], + "tool_link_ids": [item.get("visualToolId") for item in data.get("toolLinks") or [] if isinstance(item, dict)], + "source_provenance": provenance, + "methodology_context": methodology_context, + } + row = { + "phenotype_id": phenotype_id, + "source_dataset": "va_cipher", + "source_record_type": "disease_phenotype", + "source_native_id": native_id, + "name": data.get("fullName") or "", + "short_description": short_description, + "long_description": long_description, + "tags": tags, + "raw_keywords": raw_keywords, + "retrieval_keywords": retrieval_keywords, + "retrieval_concept_labels": retrieval_concept_labels, + "methodology_summary": methodology_summary, + "signals": list(dict.fromkeys(signals)), + "ontology_keys": [str(item.get("relatedDiseaseId")) for item in algorithm.get("relatedDiseases") or [] if isinstance(item, dict) and item.get("relatedDiseaseId") is not None], + "code_systems": code_systems, + "concept_evidence": concept_evidence, + "validation_features": validation_features, + "population_features": population_features, + "provenance": provenance, + "executable_definition_status": executable_status, + "executable_definition_source": "cipher_json", + "execution_readiness_score": readiness_score, + "adaptation_notes": adaptation_notes, + "translation_inputs": translation_inputs, + "retrieval_keywords_source": "heuristic", + "retrieval_text": "", + "source_meta": { + "uqid": data.get("uqid"), + "phenotypeStatusId": data.get("phenotypeStatusId"), + "categoryTypeId": data.get("categoryTypeId"), + "dbType": data.get("dbType"), + "vaDeveloped": data.get("vaDeveloped"), + "revision": data.get("revision"), + "majorRevision": data.get("majorRevision"), + }, + "source_payload_ref": path, + "definition_ref": "", } + row["retrieval_text"] = _compose_retrieval_text(row) + return row def _build_sparse_index(catalog: List[Dict[str, Any]], k1: float = 1.5, b: float = 0.75) -> Dict[str, Any]: postings: Dict[str, List[Tuple[int, int]]] = {} doc_lengths: List[int] = [] for idx, row in enumerate(catalog): - text = " ".join( - [ - row.get("name") or "", - row.get("short_description") or "", - " ".join(row.get("tags") or []), - " ".join(row.get("pop_keywords") or []), - ] - ) + text = row.get("retrieval_text") or row.get("name") or "" terms = _tokenize(text) doc_lengths.append(len(terms)) tf: Dict[str, int] = {} @@ -185,6 +1279,13 @@ def _save_cache(path: str, cache: Dict[str, List[float]]) -> None: pickle.dump(cache, handle) +def _load_existing_meta(path: str) -> Dict[str, Any]: + if not os.path.exists(path): + return {} + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + + def _build_dense_index( catalog: List[Dict[str, Any]], output_path: str, @@ -204,20 +1305,11 @@ def _build_dense_index( cache = _load_cache(cache_path) texts: List[str] = [] for row in catalog: - text = " ".join( - [ - row.get("name") or "", - row.get("short_description") or "", - " ".join(row.get("pop_keywords") or []), - ] - ).strip() - if not text: - text = row.get("name") or f"cohort {row.get('cohortId')}" + text = (row.get("retrieval_text") or row.get("name") or f"phenotype {row.get('phenotype_id')}").strip() text_hash = _hash_text(text) row["text_for_embedding_hash"] = text_hash row["text_for_embedding"] = text - cached = cache.get(text_hash) - if cached is None: + if cache.get(text_hash) is None: texts.append(text) if texts: @@ -229,13 +1321,12 @@ def _build_dense_index( for text, vec in zip(batch, vectors): cache[_hash_text(text)] = vec - # Rebuild embeddings list in catalog order embeddings = [] for row in catalog: text_hash = row.get("text_for_embedding_hash") vector = cache.get(text_hash) if vector is None: - raise RuntimeError(f"Missing embedding for cohortId {row.get('cohortId')}") + raise RuntimeError(f"Missing embedding for phenotype_id {row.get('phenotype_id')}") embeddings.append(vector) vectors = np.array(embeddings, dtype="float32") @@ -252,33 +1343,132 @@ def _build_dense_index( def main() -> int: parser = argparse.ArgumentParser(description="Build phenotype retrieval index.") - parser.add_argument("--metadata-csv", required=True, help="Path to metadata CSV.") - parser.add_argument("--definitions-dir", help="Path to cohort JSON definitions.") + parser.add_argument("--metadata-csv", help="Path to OHDSI metadata CSV.") + parser.add_argument("--definitions-dir", help="Path to OHDSI cohort JSON definitions.") + parser.add_argument("--cipher-dir", help="Path to CIPHER phenotype JSON definitions.") + parser.add_argument("--cipher-enum", help="Path to CIPHER enum JSON for code-system labels.") parser.add_argument("--output-dir", required=True, help="Index output directory.") + parser.add_argument("--derive-keywords-llm", action="store_true", help="Use chat completion to derive retrieval keywords with caching.") + parser.add_argument("--keyword-cache-path", help="Path to retrieval keyword cache JSONL. Defaults to /keyword_cache.jsonl.") + parser.add_argument("--keyword-max-terms", type=int, default=12, help="Maximum derived retrieval keywords per phenotype.") + parser.add_argument("--derive-recommendation-metadata-llm", action="store_true", help="Use chat completion to derive recommendation-oriented phenotype metadata with caching.") + parser.add_argument("--recommendation-metadata-cache-path", help="Path to recommendation metadata cache JSONL. Defaults to /recommendation_metadata_cache.jsonl.") parser.add_argument("--build-dense", action="store_true", help="Build dense FAISS index.") + parser.add_argument("--dense-only", action="store_true", help="Reuse existing catalog.jsonl in --output-dir and build only dense.index plus embedding cache/meta updates.") parser.add_argument("--require-dense", action="store_true", help="Fail if dense index cannot be built.") parser.add_argument("--batch-size", type=int, default=64, help="Embedding batch size.") args = parser.parse_args() - metadata_rows = _load_metadata(args.metadata_csv) - definitions = _load_definitions(args.definitions_dir) + if args.dense_only and not args.build_dense: + raise SystemExit("--dense-only requires --build-dense") + if args.dense_only and (args.metadata_csv or args.cipher_dir): + raise SystemExit("--dense-only cannot be combined with --metadata-csv or --cipher-dir") + if not args.dense_only and not args.metadata_csv and not args.cipher_dir: + raise SystemExit("At least one input source is required: --metadata-csv or --cipher-dir") + + _ensure_dir(args.output_dir) + catalog_path = os.path.join(args.output_dir, "catalog.jsonl") + meta_path = os.path.join(args.output_dir, "meta.json") + + if args.dense_only: + catalog = _load_catalog(catalog_path) + if not catalog: + raise SystemExit(f"No existing catalog found at {catalog_path}; cannot run --dense-only") + existing_meta = _load_existing_meta(meta_path) + dense_info = {"status": "skipped"} + embed_url = os.getenv("EMBED_URL", "http://localhost:3000/ollama/api/embed") + embed_model = os.getenv("EMBED_MODEL", "qwen3-embedding:4b") + api_key = os.getenv("EMBED_API_KEY") + client = EmbeddingClient(url=embed_url, model=embed_model, api_key=api_key) + dense_info = _build_dense_index( + catalog=catalog, + output_path=os.path.join(args.output_dir, "dense.index"), + embed_client=client, + cache_path=os.path.join(args.output_dir, "embedding_cache.pkl"), + batch_size=args.batch_size, + require_dense=args.require_dense, + ) + _write_catalog(catalog_path, catalog) + meta = dict(existing_meta) + meta["built_at"] = dt.datetime.now(dt.UTC).isoformat().replace("+00:00", "Z") + meta["catalog_count"] = len(catalog) + meta["dense"] = dense_info + meta["embedding_model"] = os.getenv("EMBED_MODEL", "qwen3-embedding:4b") + meta["embedding_url"] = os.getenv("EMBED_URL", "http://localhost:3000/ollama/api/embed") + with open(meta_path, "w", encoding="utf-8") as handle: + json.dump(meta, handle, ensure_ascii=True, indent=2) + return 0 + + definitions = _load_ohdsi_definitions(args.definitions_dir) + enum_map = _load_cipher_enum_map(args.cipher_enum) + keyword_cache_path = args.keyword_cache_path or os.path.join(args.output_dir, "keyword_cache.jsonl") + keyword_cache = _load_jsonl_cache(keyword_cache_path) + recommendation_metadata_cache_path = args.recommendation_metadata_cache_path or os.path.join(args.output_dir, "recommendation_metadata_cache.jsonl") + recommendation_metadata_cache = _load_jsonl_cache(recommendation_metadata_cache_path) catalog: List[Dict[str, Any]] = [] - for row in metadata_rows: - cohort_id = _parse_int(row.get("cohortId")) - definition = definitions.get(cohort_id) if cohort_id is not None else None - catalog.append(_build_catalog_row(row, definition)) + source_counts: Dict[str, int] = {} + keyword_source_counts: Dict[str, int] = {} + recommendation_metadata_source_counts: Dict[str, int] = {} - _ensure_dir(args.output_dir) - definitions_out = os.path.join(args.output_dir, "definitions") - if args.definitions_dir: - _ensure_dir(definitions_out) - for cohort_id, data in definitions.items(): - path = os.path.join(definitions_out, f"{cohort_id}.json") - with open(path, "w", encoding="utf-8") as handle: - json.dump(data, handle, ensure_ascii=True) + if args.metadata_csv: + metadata_rows = _load_metadata(args.metadata_csv) + for row in metadata_rows: + cohort_id = _parse_int(row.get("cohortId")) + definition = definitions.get(cohort_id) if cohort_id is not None else None + built = _build_ohdsi_row(row, definition) + if definition is not None: + built["definition_ref"] = _copy_definition(args.output_dir, built["phenotype_id"], definition) + new_cache_entry = _apply_llm_retrieval_keywords( + built, + keyword_cache=keyword_cache, + enabled=args.derive_keywords_llm, + max_terms=args.keyword_max_terms, + ) + if new_cache_entry is not None: + _append_jsonl_cache_entry(keyword_cache_path, new_cache_entry) + new_rec_entry = _apply_llm_recommendation_metadata( + built, + recommendation_cache=recommendation_metadata_cache, + enabled=args.derive_recommendation_metadata_llm, + ) + if new_rec_entry is not None: + _append_jsonl_cache_entry(recommendation_metadata_cache_path, new_rec_entry) + source_counts[built["source_dataset"]] = source_counts.get(built["source_dataset"], 0) + 1 + keyword_source = built.get("retrieval_keywords_source") or "heuristic" + keyword_source_counts[keyword_source] = keyword_source_counts.get(keyword_source, 0) + 1 + rec_source = built.get("recommendation_metadata_source") or "heuristic" + recommendation_metadata_source_counts[rec_source] = recommendation_metadata_source_counts.get(rec_source, 0) + 1 + catalog.append(built) + + if args.cipher_dir: + for path, record in _load_cipher_records(args.cipher_dir): + built = _build_cipher_row(path, record, enum_map) + built["definition_ref"] = _copy_source_file(args.output_dir, built["phenotype_id"], path) + new_cache_entry = _apply_llm_retrieval_keywords( + built, + keyword_cache=keyword_cache, + enabled=args.derive_keywords_llm, + max_terms=args.keyword_max_terms, + ) + if new_cache_entry is not None: + _append_jsonl_cache_entry(keyword_cache_path, new_cache_entry) + new_rec_entry = _apply_llm_recommendation_metadata( + built, + recommendation_cache=recommendation_metadata_cache, + enabled=args.derive_recommendation_metadata_llm, + ) + if new_rec_entry is not None: + _append_jsonl_cache_entry(recommendation_metadata_cache_path, new_rec_entry) + source_counts[built["source_dataset"]] = source_counts.get(built["source_dataset"], 0) + 1 + keyword_source = built.get("retrieval_keywords_source") or "heuristic" + keyword_source_counts[keyword_source] = keyword_source_counts.get(keyword_source, 0) + 1 + rec_source = built.get("recommendation_metadata_source") or "heuristic" + recommendation_metadata_source_counts[rec_source] = recommendation_metadata_source_counts.get(rec_source, 0) + 1 + catalog.append(built) + + catalog.sort(key=lambda row: (row.get("source_dataset") or "", row.get("name") or "", row.get("phenotype_id") or "")) - catalog_path = os.path.join(args.output_dir, "catalog.jsonl") _write_catalog(catalog_path, catalog) sparse_index = _build_sparse_index(catalog) @@ -299,16 +1489,31 @@ def main() -> int: batch_size=args.batch_size, require_dense=args.require_dense, ) + _write_catalog(catalog_path, catalog) meta = { - "built_at": dt.datetime.utcnow().isoformat() + "Z", + "built_at": dt.datetime.now(dt.UTC).isoformat().replace("+00:00", "Z"), "catalog_count": len(catalog), + "source_counts": source_counts, "dense": dense_info, "sparse": { "doc_count": len(catalog), "k1": sparse_index["k1"], "b": sparse_index["b"], }, + "keyword_derivation": { + "llm_enabled": bool(args.derive_keywords_llm), + "cache_path": keyword_cache_path, + "max_terms": args.keyword_max_terms, + "source_counts": keyword_source_counts, + "cache_entries": len(keyword_cache), + }, + "recommendation_metadata_derivation": { + "llm_enabled": bool(args.derive_recommendation_metadata_llm), + "cache_path": recommendation_metadata_cache_path, + "source_counts": recommendation_metadata_source_counts, + "cache_entries": len(recommendation_metadata_cache), + }, "embedding_model": os.getenv("EMBED_MODEL", "qwen3-embedding:4b"), "embedding_url": os.getenv("EMBED_URL", "http://localhost:3000/ollama/api/embed"), } diff --git a/mcp_server/study_agent_mcp/retrieval/index.py b/mcp_server/study_agent_mcp/retrieval/index.py index bc17a4d..1d7a075 100644 --- a/mcp_server/study_agent_mcp/retrieval/index.py +++ b/mcp_server/study_agent_mcp/retrieval/index.py @@ -44,6 +44,18 @@ def _hash_text(text: str) -> str: return hashlib.sha256(text.encode("utf-8")).hexdigest() +def _normalize_score_map(scores: Dict[int, float]) -> Dict[int, float]: + if not scores: + return {} + values = [float(score) for score in scores.values()] + min_score = min(values) + max_score = max(values) + if math.isclose(max_score, min_score): + return {doc_id: 1.0 for doc_id in scores} + scale = max_score - min_score + return {doc_id: (float(score) - min_score) / scale for doc_id, score in scores.items()} + + def _load_catalog(path: str) -> List[Dict[str, Any]]: catalog: List[Dict[str, Any]] = [] if not os.path.exists(path): @@ -57,6 +69,12 @@ def _load_catalog(path: str) -> List[Dict[str, Any]]: return catalog +def _definition_filename(phenotype_id: str) -> str: + safe = phenotype_id.replace(":", "__") + safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", safe) + return f"{safe}.json" + + @dataclass class EmbeddingClient: url: str @@ -124,7 +142,7 @@ def __init__( self.allow_sparse = allow_sparse self._catalog: List[Dict[str, Any]] = [] - self._catalog_by_id: Dict[int, Dict[str, Any]] = {} + self._catalog_by_id: Dict[str, Dict[str, Any]] = {} self._sparse: Optional[Dict[str, Any]] = None self._dense: Optional[Any] = None self._meta: Dict[str, Any] = {} @@ -142,9 +160,9 @@ def load(self) -> "PhenotypeIndex": self._catalog = _load_catalog(paths["catalog"]) self._catalog_by_id = {} for row in self._catalog: - cid = row.get("cohortId") - if isinstance(cid, int): - self._catalog_by_id[cid] = row + phenotype_id = row.get("phenotype_id") + if isinstance(phenotype_id, str) and phenotype_id: + self._catalog_by_id[phenotype_id] = row if os.path.exists(paths["meta"]): with open(paths["meta"], "r", encoding="utf-8") as handle: self._meta = json.load(handle) @@ -160,20 +178,52 @@ def load(self) -> "PhenotypeIndex": self._dense = faiss.read_index(paths["dense"]) return self - def fetch_summary(self, cohort_id: int) -> Optional[Dict[str, Any]]: - row = self._catalog_by_id.get(cohort_id) + def fetch_summary(self, phenotype_id: str) -> Optional[Dict[str, Any]]: + row = self._catalog_by_id.get(phenotype_id) if not row: return None return { - "cohortId": row.get("cohortId"), + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset"), + "source_record_type": row.get("source_record_type"), "name": row.get("name"), "short_description": row.get("short_description"), "tags": row.get("tags") or [], + "raw_keywords": row.get("raw_keywords") or [], + "retrieval_keywords": row.get("retrieval_keywords") or [], + "retrieval_keywords_source": row.get("retrieval_keywords_source") or "heuristic", + "retrieval_concept_labels": row.get("retrieval_concept_labels") or [], + "methodology_summary": row.get("methodology_summary") or "", + "primary_clinical_topic": row.get("primary_clinical_topic") or "", + "secondary_topics": row.get("secondary_topics") or [], + "phenotype_role": row.get("phenotype_role") or "unknown", + "care_setting_scope": row.get("care_setting_scope") or "unspecified", + "population_scope": row.get("population_scope") or "", + "topic_mentions": row.get("topic_mentions") or {}, + "target_vs_context_conditions": row.get("target_vs_context_conditions") or {}, + "exclude_from_primary_topic_match": row.get("exclude_from_primary_topic_match") or [], + "recommendation_summary": row.get("recommendation_summary") or "", + "recommendation_metadata_source": row.get("recommendation_metadata_source") or "heuristic", "signals": row.get("signals") or [], "ontology_keys": row.get("ontology_keys") or [], - "logic_features": row.get("logic_features") or {}, + "code_systems": row.get("code_systems") or [], + "executable_definition_status": row.get("executable_definition_status"), + "execution_readiness_score": row.get("execution_readiness_score"), + "adaptation_notes": row.get("adaptation_notes") or "", } + def fetch_definition(self, phenotype_id: str) -> Optional[Dict[str, Any]]: + row = self._catalog_by_id.get(phenotype_id) + if not row: + return None + definitions_dir = os.path.join(self.index_dir, "definitions") + ref = row.get("definition_ref") or _definition_filename(phenotype_id) + path = os.path.join(definitions_dir, ref) + if not os.path.exists(path): + return None + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + def search( self, query: str, @@ -181,18 +231,21 @@ def search( offset: int = 0, dense_k: int = 100, sparse_k: int = 100, - dense_weight: float = 0.9, - sparse_weight: float = 0.1, + dense_weight: float = 0.6, + sparse_weight: float = 0.4, ) -> List[Dict[str, Any]]: if not query: return [] - dense_scores: Dict[int, float] = {} - sparse_scores: Dict[int, float] = {} + dense_scores_raw: Dict[int, float] = {} + sparse_scores_raw: Dict[int, float] = {} if self._dense is not None and self.embedding_client is not None: - dense_scores = self._dense_search(query, dense_k) + dense_scores_raw = self._dense_search(query, dense_k) if self._sparse is not None: - sparse_scores = self._sparse_search(query, sparse_k) + sparse_scores_raw = self._sparse_search(query, sparse_k) + + dense_scores = _normalize_score_map(dense_scores_raw) + sparse_scores = _normalize_score_map(sparse_scores_raw) merged: Dict[int, float] = {} for doc_id, score in dense_scores.items(): @@ -210,28 +263,29 @@ def search( row = self._catalog[doc_id] results.append( { - "cohortId": row.get("cohortId"), + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset"), "name": row.get("name"), "short_description": row.get("short_description"), "tags": row.get("tags") or [], "signals": row.get("signals") or [], + "executable_definition_status": row.get("executable_definition_status"), + "execution_readiness_score": row.get("execution_readiness_score"), "score": score, "score_dense": dense_scores.get(doc_id), "score_sparse": sparse_scores.get(doc_id), + "score_dense_raw": dense_scores_raw.get(doc_id), + "score_sparse_raw": sparse_scores_raw.get(doc_id), } ) return results - def list_similar(self, cohort_id: int, top_k: int = 10) -> List[Dict[str, Any]]: + def list_similar(self, phenotype_id: str, top_k: int = 10) -> List[Dict[str, Any]]: if self._dense is None: return [] - doc_id = self._find_doc_id(cohort_id) + doc_id = self._find_doc_id(phenotype_id) if doc_id is None: return [] - try: - import faiss # type: ignore - except ImportError: - return [] try: vector = self._dense.reconstruct(doc_id) except Exception: @@ -249,7 +303,8 @@ def list_similar(self, cohort_id: int, top_k: int = 10) -> List[Dict[str, Any]]: row = self._catalog[idx] results.append( { - "cohortId": row.get("cohortId"), + "phenotype_id": row.get("phenotype_id"), + "source_dataset": row.get("source_dataset"), "name": row.get("name"), "short_description": row.get("short_description"), "score": float(score), @@ -259,9 +314,9 @@ def list_similar(self, cohort_id: int, top_k: int = 10) -> List[Dict[str, Any]]: break return results - def _find_doc_id(self, cohort_id: int) -> Optional[int]: + def _find_doc_id(self, phenotype_id: str) -> Optional[int]: for idx, row in enumerate(self._catalog): - if row.get("cohortId") == cohort_id: + if row.get("phenotype_id") == phenotype_id: return idx return None @@ -358,9 +413,7 @@ def get_default_index() -> PhenotypeIndex: catalog_info = status["files"].get("catalog") or {} if not catalog_info.get("exists"): raise RuntimeError(f"Phenotype catalog not found: {catalog_info.get('path')}") - embed_url = rewrite_container_host_url( - os.getenv("EMBED_URL", "http://localhost:3000/ollama/api/embed") - ) + embed_url = rewrite_container_host_url(os.getenv("EMBED_URL", "http://localhost:3000/ollama/api/embed")) embed_model = os.getenv("EMBED_MODEL", "qwen3-embedding:4b") api_key = os.getenv("EMBED_API_KEY") embedding_client = EmbeddingClient(url=embed_url, model=embed_model, api_key=api_key) diff --git a/mcp_server/study_agent_mcp/tools/__init__.py b/mcp_server/study_agent_mcp/tools/__init__.py index e434786..926feba 100644 --- a/mcp_server/study_agent_mcp/tools/__init__.py +++ b/mcp_server/study_agent_mcp/tools/__init__.py @@ -8,6 +8,7 @@ TOOL_MODULES: list[str] = [ "study_agent_mcp.tools.concept_set_diff", "study_agent_mcp.tools.cohort_lint", + "study_agent_mcp.tools.cohort_methods_intent_split", "study_agent_mcp.tools.phenotype_recommendations", "study_agent_mcp.tools.phenotype_improvements", "study_agent_mcp.tools.phenotype_intent_split", @@ -24,6 +25,7 @@ "study_agent_mcp.tools.case_causal_review", "study_agent_mcp.tools.keeper_concept_sets", "study_agent_mcp.tools.keeper_profiles", + "study_agent_mcp.tools.cohort_methods_prompt_bundle", ] diff --git a/mcp_server/study_agent_mcp/tools/_review_row.py b/mcp_server/study_agent_mcp/tools/_review_row.py index e19ea01..c0140e9 100644 --- a/mcp_server/study_agent_mcp/tools/_review_row.py +++ b/mcp_server/study_agent_mcp/tools/_review_row.py @@ -4,6 +4,8 @@ import re from typing import Any, Dict, Iterable, List +from ._service_registry import get_controlled_identifier_keys + _EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") _URL_RE = re.compile(r"https?://\S+") _IP_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b") @@ -51,6 +53,22 @@ "death_date", } +_DEFAULT_CONTROLLED_IDENTIFIER_KEYS = { + "ingred_rxcui", + "rxcui", + "adverse_event_meddra_id", + "meddra_code", + "meddra_id", + "concept_id", + "ingredient_concept_id", + "adverse_event_concept_id", + "outcome_concept_id", +} + + +def _configured_controlled_identifier_keys() -> frozenset[str]: + return get_controlled_identifier_keys("case_causal_review", _DEFAULT_CONTROLLED_IDENTIFIER_KEYS) + _ALLOWED_SUBROLES = { "primary_suspect", "secondary_suspect", @@ -59,6 +77,9 @@ "vulnerability_factor", "contextual_factor", "proximate_marker", + "candidate_exposure", + "background_exposure", + "candidate_condition", "index_event", } @@ -81,24 +102,27 @@ def bucket_age(age: Any) -> str: return f"{bucket}-{bucket+4}" -def sanitize_text(text: str) -> str: +def sanitize_text(text: str, key_name: str | None = None) -> str: if not text: return "None" value = str(text).strip() if not value: return "None" + value = _DAY_RE.sub("(prior)", value) + key_norm = str(key_name or "").strip().lower() + if key_norm in _configured_controlled_identifier_keys(): + return value value = _EMAIL_RE.sub("[REDACTED_EMAIL]", value) value = _URL_RE.sub("[REDACTED_URL]", value) value = _IP_RE.sub("[REDACTED_IP]", value) value = _PHONE_RE.sub("[REDACTED_PHONE]", value) value = _DATE_RE.sub("[REDACTED_DATE]", value) value = _ZIP_RE.sub("[REDACTED_ZIP]", value) - value = _DAY_RE.sub("(prior)", value) return value -def clean_optional_text(value: Any) -> str: - text = sanitize_text(str(value or "")) +def clean_optional_text(value: Any, key_name: str | None = None) -> str: + text = sanitize_text(str(value or ""), key_name=key_name) return "" if text == "None" else text @@ -161,27 +185,28 @@ def normalize_subrole(value: Any, default: str) -> str: return default -def sanitize_scalar(value: Any) -> Any: +def sanitize_scalar(value: Any, key_name: str | None = None) -> Any: if value is None: return None if isinstance(value, bool): return value if isinstance(value, (int, float)): return value - return sanitize_text(str(value)) + return sanitize_text(str(value), key_name=key_name) -def sanitize_nested(value: Any, depth: int = 0) -> Any: +def sanitize_nested(value: Any, depth: int = 0, key_name: str | None = None) -> Any: if depth > 4: - return sanitize_text(json.dumps(value, ensure_ascii=True, sort_keys=True)) + return sanitize_text(json.dumps(value, ensure_ascii=True, sort_keys=True), key_name=key_name) if isinstance(value, dict): sanitized: Dict[str, Any] = {} for key, inner in value.items(): - sanitized[sanitize_text(str(key))] = sanitize_nested(inner, depth + 1) + raw_key = str(key) + sanitized[sanitize_text(raw_key)] = sanitize_nested(inner, depth + 1, key_name=raw_key) return sanitized if isinstance(value, list): - return [sanitize_nested(item, depth + 1) for item in value[:50]] - return sanitize_scalar(value) + return [sanitize_nested(item, depth + 1, key_name=key_name) for item in value[:50]] + return sanitize_scalar(value, key_name=key_name) def collect_phi_issues(value: Any, path: str = "case_row") -> List[str]: diff --git a/mcp_server/study_agent_mcp/tools/_service_registry.py b/mcp_server/study_agent_mcp/tools/_service_registry.py new file mode 100644 index 0000000..e1d13b2 --- /dev/null +++ b/mcp_server/study_agent_mcp/tools/_service_registry.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import functools +import os +from typing import Any, Iterable + +import yaml + + +@functools.lru_cache(maxsize=1) +def load_service_registry() -> dict[str, Any]: + registry_path = os.getenv("STUDY_AGENT_SERVICE_REGISTRY", "docs/SERVICE_REGISTRY.yaml") + with open(registry_path, "r", encoding="utf-8") as handle: + data = yaml.safe_load(handle) or {} + return data if isinstance(data, dict) else {} + + +def get_service_definition(service_name: str) -> dict[str, Any]: + data = load_service_registry() + services = data.get("services") or {} + if not isinstance(services, dict): + return {} + service = services.get(service_name) or {} + return service if isinstance(service, dict) else {} + + +def get_service_validation(service_name: str) -> dict[str, Any]: + service = get_service_definition(service_name) + validation = service.get("validation") or {} + return validation if isinstance(validation, dict) else {} + + +def get_controlled_identifier_keys(service_name: str, fallback: Iterable[str]) -> frozenset[str]: + try: + validation = get_service_validation(service_name) + except Exception: + return frozenset(str(key).strip().lower() for key in fallback if str(key).strip()) + + keys = validation.get("controlled_identifier_keys") or [] + if not isinstance(keys, list): + return frozenset(str(key).strip().lower() for key in fallback if str(key).strip()) + + configured = { + str(key).strip().lower() + for key in keys + if str(key).strip() + } + if configured: + return frozenset(configured) + return frozenset(str(key).strip().lower() for key in fallback if str(key).strip()) diff --git a/mcp_server/study_agent_mcp/tools/cohort_methods_intent_split.py b/mcp_server/study_agent_mcp/tools/cohort_methods_intent_split.py new file mode 100644 index 0000000..5198faa --- /dev/null +++ b/mcp_server/study_agent_mcp/tools/cohort_methods_intent_split.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import json +import os +from typing import Any, Dict + +from ._common import with_meta + + +_CACHE: Dict[str, Dict[str, Any]] = {} + + +def _prompt_dir() -> str: + base = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "prompts", "phenotype")) + return base + + +def _load_text(path: str) -> str: + with open(path, "r", encoding="utf-8") as handle: + return handle.read().strip() + + +def _load_json(path: str) -> Dict[str, Any]: + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + + +def _load_bundle() -> Dict[str, Any]: + cached = _CACHE.get("cohort_methods_intent_split") + if cached is not None: + return cached + base = _prompt_dir() + overview = _load_text(os.path.join(base, "overview_cohort_methods_intent_split.md")) + spec = _load_text(os.path.join(base, "spec_cohort_methods_intent_split.md")) + schema = _load_json(os.path.join(base, "output_schema_cohort_methods_intent_split.json")) + payload = { + "task": "cohort_methods_intent_split", + "overview": overview, + "spec": spec, + "output_schema": schema, + } + _CACHE["cohort_methods_intent_split"] = payload + return payload + + +def register(mcp: object) -> None: + @mcp.tool(name="cohort_methods_intent_split") + def cohort_methods_intent_split_tool() -> Dict[str, Any]: + payload = _load_bundle() + return with_meta(payload, "cohort_methods_intent_split") + + return None diff --git a/mcp_server/study_agent_mcp/tools/cohort_methods_prompt_bundle.py b/mcp_server/study_agent_mcp/tools/cohort_methods_prompt_bundle.py new file mode 100644 index 0000000..f494bb0 --- /dev/null +++ b/mcp_server/study_agent_mcp/tools/cohort_methods_prompt_bundle.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import json +import os +from typing import Any, Dict, Optional + +from ._common import with_meta + + +_CACHE: Optional[Dict[str, Any]] = None + + +def _prompt_dir() -> str: + return os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..", "prompts", "cohort_methods") + ) + + +def _analysis_template_path() -> str: + return os.path.join(_prompt_dir(), "cmAnalysis_template.json") + + +def _field_descriptions_path() -> str: + return os.path.join(_prompt_dir(), "CM_ANALYSIS_TEMPLATE.md") + + +def _instruction_template_path() -> str: + return os.path.join(_prompt_dir(), "instruction_cohort_methods_specs.md") + + +def _output_style_template_path() -> str: + return os.path.join(_prompt_dir(), "output_style_cohort_methods_specs.md") + + +def _load_text(path: str) -> str: + with open(path, "r", encoding="utf-8") as handle: + return handle.read().strip() + + +def _load_json(path: str) -> Dict[str, Any]: + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + + +def _load_field_descriptions() -> str: + text = _load_text(_field_descriptions_path()) + marker = "## Top-Level Shape" + idx = text.find(marker) + if idx == -1: + raise ValueError(f"missing field description marker: {marker}") + return text[idx:].strip() + + +def _build_bundle() -> Dict[str, Any]: + defaults_spec = _load_json(_analysis_template_path()) + analysis_template = json.dumps(defaults_spec, indent=2) + return { + "instruction_template": _load_text(_instruction_template_path()), + "output_style_template": _load_text(_output_style_template_path()), + "annotated_template": analysis_template, + "analysis_specifications_template": analysis_template, + "json_field_descriptions": _load_field_descriptions(), + "defaults_spec": defaults_spec, + "schema_version": "v1.4.0", + } + + +def register(mcp: object) -> None: + global _CACHE + + @mcp.tool(name="cohort_methods_prompt_bundle") + def cohort_methods_prompt_bundle_tool() -> Dict[str, Any]: + global _CACHE + if _CACHE is None: + _CACHE = _build_bundle() + return with_meta(_CACHE, "cohort_methods_prompt_bundle") + + return None diff --git a/mcp_server/study_agent_mcp/tools/phenotype_fetch_definition.py b/mcp_server/study_agent_mcp/tools/phenotype_fetch_definition.py index ba10685..efb4aab 100644 --- a/mcp_server/study_agent_mcp/tools/phenotype_fetch_definition.py +++ b/mcp_server/study_agent_mcp/tools/phenotype_fetch_definition.py @@ -1,7 +1,5 @@ from __future__ import annotations -import json -import os from typing import Any, Dict from study_agent_mcp.retrieval import get_default_index @@ -24,23 +22,15 @@ def _truncate(obj: Any, depth: int = 0, max_depth: int = 4, max_list: int = 20, def register(mcp: object) -> None: @mcp.tool(name="phenotype_fetch_definition") def phenotype_fetch_definition_tool( - cohortId: int, + phenotype_id: str, truncate: bool = True, ) -> Dict[str, Any]: index = get_default_index() - definitions_dir = os.path.join(index.index_dir, "definitions") - path = os.path.join(definitions_dir, f"{int(cohortId)}.json") - if not os.path.exists(path): - payload = {"error": f"definition not found for cohortId {cohortId}"} + data = index.fetch_definition(str(phenotype_id)) + if data is None: + payload = {"error": f"definition not found for phenotype_id {phenotype_id}"} return with_meta(payload, "phenotype_fetch_definition") - with open(path, "r", encoding="utf-8") as handle: - try: - data = json.load(handle) - except json.JSONDecodeError: - payload = {"error": f"definition JSON invalid for cohortId {cohortId}"} - return with_meta(payload, "phenotype_fetch_definition") - if truncate: data = _truncate(data) payload = {"definition": data} diff --git a/mcp_server/study_agent_mcp/tools/phenotype_fetch_summary.py b/mcp_server/study_agent_mcp/tools/phenotype_fetch_summary.py index 4e7ddb5..15f4494 100644 --- a/mcp_server/study_agent_mcp/tools/phenotype_fetch_summary.py +++ b/mcp_server/study_agent_mcp/tools/phenotype_fetch_summary.py @@ -9,11 +9,11 @@ def register(mcp: object) -> None: @mcp.tool(name="phenotype_fetch_summary") - def phenotype_fetch_summary_tool(cohortId: int) -> Dict[str, Any]: + def phenotype_fetch_summary_tool(phenotype_id: str) -> Dict[str, Any]: index = get_default_index() - summary = index.fetch_summary(int(cohortId)) + summary = index.fetch_summary(str(phenotype_id)) if summary is None: - payload = {"error": f"cohortId {cohortId} not found"} + payload = {"error": f"phenotype_id {phenotype_id} not found"} else: payload = {"summary": summary} return with_meta(payload, "phenotype_fetch_summary") diff --git a/mcp_server/study_agent_mcp/tools/phenotype_list_similar.py b/mcp_server/study_agent_mcp/tools/phenotype_list_similar.py index 24418f1..804caf4 100644 --- a/mcp_server/study_agent_mcp/tools/phenotype_list_similar.py +++ b/mcp_server/study_agent_mcp/tools/phenotype_list_similar.py @@ -10,13 +10,13 @@ def register(mcp: object) -> None: @mcp.tool(name="phenotype_list_similar") def phenotype_list_similar_tool( - cohortId: int, + phenotype_id: str, top_k: int = 10, ) -> Dict[str, Any]: index = get_default_index() - results = index.list_similar(int(cohortId), top_k=top_k) + results = index.list_similar(str(phenotype_id), top_k=top_k) payload = { - "cohortId": int(cohortId), + "phenotype_id": str(phenotype_id), "results": results, "count": len(results), } diff --git a/mcp_server/study_agent_mcp/tools/phenotype_prompt_bundle.py b/mcp_server/study_agent_mcp/tools/phenotype_prompt_bundle.py index 4e2bfe5..d54e7b7 100644 --- a/mcp_server/study_agent_mcp/tools/phenotype_prompt_bundle.py +++ b/mcp_server/study_agent_mcp/tools/phenotype_prompt_bundle.py @@ -29,16 +29,29 @@ def _load_bundle(task: str) -> Dict[str, Any]: cached = _CACHE.get(task) if cached is not None: return cached - if task not in ("phenotype_recommendations", "phenotype_improvements", "cohort_critique_general_design"): + if task not in ( + "phenotype_recommendation_intent_facets", + "phenotype_recommendation_plan", + "phenotype_recommendations", + "phenotype_improvements", + "cohort_critique_general_design", + ): return {"error": f"unsupported task {task}"} base = _prompt_dir() if task == "cohort_critique_general_design": overview = _load_text(os.path.join(os.path.dirname(base), "lint", "overview_lint.md")) spec = _load_text(os.path.join(os.path.dirname(base), "lint", "spec_cohort_critique.md")) schema = _load_json(os.path.join(os.path.dirname(base), "lint", "output_schema_cohort_critique_general_design.json")) + elif task == "phenotype_recommendation_intent_facets": + overview = _load_text(os.path.join(base, "overview_phenotype_recommendation_intent_facets.md")) + spec = _load_text(os.path.join(base, "spec_phenotype_recommendation_intent_facets.md")) + schema = _load_json(os.path.join(base, "output_schema_phenotype_recommendation_intent_facets.json")) else: overview = _load_text(os.path.join(base, "overview_phenotype.md")) - if task == "phenotype_improvements": + if task == "phenotype_recommendation_plan": + spec = _load_text(os.path.join(base, "spec_phenotype_recommendation_plan.md")) + schema = _load_json(os.path.join(base, "output_schema_phenotype_recommendation_plan.json")) + elif task == "phenotype_improvements": spec = _load_text(os.path.join(base, "spec_phenotype_improvements.md")) schema = _load_json(os.path.join(base, "output_schema_phenotype_improvements.json")) elif task == "phenotype_recommendations": diff --git a/mcp_server/study_agent_mcp/tools/phenotype_search.py b/mcp_server/study_agent_mcp/tools/phenotype_search.py index 9594bec..d314403 100644 --- a/mcp_server/study_agent_mcp/tools/phenotype_search.py +++ b/mcp_server/study_agent_mcp/tools/phenotype_search.py @@ -21,8 +21,8 @@ def phenotype_search_tool( dense_weight: Optional[float] = None, sparse_weight: Optional[float] = None, ) -> Dict[str, Any]: - default_dense_weight = float(os.getenv("PHENOTYPE_DENSE_WEIGHT", "0.9")) - default_sparse_weight = float(os.getenv("PHENOTYPE_SPARSE_WEIGHT", "0.1")) + default_dense_weight = float(os.getenv("PHENOTYPE_DENSE_WEIGHT", "0.7")) + default_sparse_weight = float(os.getenv("PHENOTYPE_SPARSE_WEIGHT", "0.3")) if dense_weight is None: dense_weight = default_dense_weight if sparse_weight is None: diff --git a/scripts/test_phenotype_recommendations.R b/scripts/test_phenotype_recommendations.R index b1f34c2..daffd16 100755 --- a/scripts/test_phenotype_recommendations.R +++ b/scripts/test_phenotype_recommendations.R @@ -5,17 +5,18 @@ # Import the R thin api to the ACP server/bridge devtools::load_all("OHDSI-Study-Agent/R/OHDSIAssistant") +Sys.setenv(PHENOTYPE_INDEX_DIR="OHDSI-Study-Agent/data/phenotype_index_cipher_omop") + # confirm the ACP server/bridge is running OHDSIAssistant::acp_connect("http://127.0.0.1:8765") - ############################################################ ## -- `phenotype_recommendations` (ACP flow) protocol <- "OHDSI-Study-Agent/demo/protocol.md" study_dir <- "OHDSI-Study-Agent/demo" -rec <- OHDSIAssistant::suggestPhenotypes(protocolPath = protocol, maxResults = 10, candidateLimit = 10, interactive = TRUE) +rec <- OHDSIAssistant::suggestPhenotypes(protocolPath = protocol, maxResults = 10, candidateLimit = 20, interactive = TRUE) core <- rec$recommendations %||% rec ids <- OHDSIAssistant::selectPhenotypeRecommendations(core$phenotype_recommendations, select = NULL, interactive = interactive()) # this will write the JSON for the selected cohort definitions to a folder diff --git a/scripts/test_strategus_incidence_plus_keeper.R b/scripts/test_strategus_incidence_plus_keeper.R index 767d54e..7dbad46 100644 --- a/scripts/test_strategus_incidence_plus_keeper.R +++ b/scripts/test_strategus_incidence_plus_keeper.R @@ -7,8 +7,7 @@ ### CLEAN UP FROM LAST RUN? # Uncomment to reset the state of the output folder # Or add `reset = TRUE ` to the function call -#unlink("OHDSI-Study-Agent/demo-strategus-cohort-incidence", recursive = TRUE, force = TRUE) - +unlink("OHDSI-Study-Agent/demo-strategus-cohort-incidence", recursive = TRUE, force = TRUE) # Import the R thin api to the ACP server/bridge Sys.setenv(ACP_TIMEOUT = "280") @@ -19,18 +18,20 @@ OHDSIAssistant::acp_connect("http://127.0.0.1:8765") ## Run an interactive agent "shell" -## First enter this study intent which does not really return relevant phenotype definitions: +## (NO RELEVANT PHENOTYPE TEST) First enter this study intent which does not really return relevant phenotype definitions: ## "What is the risk of GI bleed in new users of Celecoxib compared to new users of Diclofenac?" OHDSIAssistant::runStrategusIncidenceShell( outputDir = "demo-strategus-cohort-incidence", - studyAgentBaseDir = "OHDSI-Study-Agent" + studyAgentBaseDir = "OHDSI-Study-Agent", + indexDir="data/phenotype_index_cipher_omop/" ) -## Rerun the study agent with a study intent that does have relevant phenotype definitions: +## (RELEVANT PHENOTYPE TEST) Run the study agent with a study intent that does have relevant phenotype definitions: OHDSIAssistant::runStrategusIncidenceShell( outputDir = "demo-strategus-cohort-incidence", studyAgentBaseDir = "OHDSI-Study-Agent", + indexDir="data/phenotype_index_cipher_omop/", studyIntent = "What is the risk of GI bleed in new users of tofacitinib compared to new users of ruxolitinib?" ) @@ -42,5 +43,6 @@ OHDSIAssistant::runStrategusIncidenceShell( resume = TRUE, allowCache = TRUE, promptOnCache = FALSE, - interactive = FALSE + interactive = FALSE, + indexDir="data/phenotype_index_cipher_omop/" ) diff --git a/tests/cohort_methods_intent_split_smoke_test.py b/tests/cohort_methods_intent_split_smoke_test.py new file mode 100644 index 0000000..ed9b367 --- /dev/null +++ b/tests/cohort_methods_intent_split_smoke_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import json +import os +import sys +import urllib.error +import urllib.request + +ACP_URL = os.getenv( + "ACP_URL", + "http://127.0.0.1:8765/flows/cohort_methods_intent_split", +) +ACP_TIMEOUT = int(os.getenv("ACP_TIMEOUT", "180")) + +STUDY_INTENT = ( + "Compare new users of sitagliptin versus new users of glipizide for acute myocardial " + "infarction in adults with type 2 diabetes." +) + + +def main() -> int: + payload = { + "study_intent": STUDY_INTENT, + } + body = json.dumps(payload).encode("utf-8") + request = urllib.request.Request(ACP_URL, data=body, method="POST") + request.add_header("Content-Type", "application/json") + + try: + with urllib.request.urlopen(request, timeout=ACP_TIMEOUT) as response: + raw = response.read().decode("utf-8") + except urllib.error.HTTPError as exc: + raw = exc.read().decode("utf-8", errors="replace") + print(raw) + return 1 + + print(raw) + result = json.loads(raw) + assert result.get("status") == "ok", result + + intent_split = result.get("intent_split") or {} + assert intent_split.get("status") in {"ok", "needs_clarification"}, intent_split + if intent_split.get("status") == "ok": + assert intent_split.get("target_statement"), "target_statement must be non-empty" + assert intent_split.get("comparator_statement"), "comparator_statement must be non-empty" + assert intent_split.get("outcome_statement"), "outcome_statement must be non-empty" + assert intent_split.get("outcome_statements"), "outcome_statements must be non-empty" + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/cohort_methods_specs_flow_smoke_test.py b/tests/cohort_methods_specs_flow_smoke_test.py new file mode 100644 index 0000000..79eb251 --- /dev/null +++ b/tests/cohort_methods_specs_flow_smoke_test.py @@ -0,0 +1,59 @@ +"""Live ACP + MCP smoke test for the cohort methods specs flow. + +Requires the ACP server to be running at http://127.0.0.1:8765 with MCP +reachable and LLM credentials configured. Invoked by +`doit smoke_cohort_methods_specs_recommend_flow`. +""" +from __future__ import annotations + +import json +import sys +import urllib.error +import urllib.request + + +URL = "http://127.0.0.1:8765/flows/cohort_methods_specifications_recommendation" + +DESCRIPTION = ( + "Compare sitagliptin new users vs glipizide new users for acute myocardial " + "infarction. Use a 365-day washout, intent-to-treat follow-up, 1:1 propensity " + "score matching on standardized logit with a caliper of 0.2, and a Cox model." +) + +REQUEST_BODY = { + "analytic_settings_description": DESCRIPTION, + "study_description": DESCRIPTION, + "study_intent": "Comparative effectiveness study on CV outcomes.", +} + + +def main() -> int: + req = urllib.request.Request( + URL, + data=json.dumps(REQUEST_BODY).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=240) as resp: + body = resp.read().decode("utf-8") + except urllib.error.HTTPError as exc: + print(f"HTTPError {exc.code}: {exc.read().decode('utf-8', errors='replace')}") + return 2 + + result = json.loads(body) + print("status:", result.get("status")) + rec = result.get("recommendation") or {} + print("recommendation.status:", rec.get("status")) + print("profile_name:", rec.get("profile_name")) + print("failed_sections:", result.get("diagnostics", {}).get("failed_sections")) + for section, entry in (result.get("section_rationales") or {}).items(): + print(f" {section}: {entry.get('confidence')} {entry.get('rationale')}") + + assert result.get("status") in {"ok", "schema_validation_error", "llm_parse_error"}, result + assert rec.get("raw_description"), "recommendation.raw_description must be non-empty" + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_acp_cohort_methods_flow.py b/tests/test_acp_cohort_methods_flow.py new file mode 100644 index 0000000..41bf05e --- /dev/null +++ b/tests/test_acp_cohort_methods_flow.py @@ -0,0 +1,164 @@ +import json +from typing import Any, Dict +from unittest.mock import MagicMock + +import pytest + +from study_agent_acp.agent import StudyAgent + + +pytestmark = pytest.mark.acp + + +def _annotated_template() -> str: + import os + here = os.path.dirname(os.path.abspath(__file__)) + path = os.path.abspath(os.path.join(here, "..", "mcp_server", "prompts", "cohort_methods", "cmAnalysis_template.json")) + with open(path, "r", encoding="utf-8") as fh: + return fh.read() + + +def _defaults_spec() -> Dict[str, Any]: + return json.loads(_annotated_template()) + + +def _make_bundle_payload() -> Dict[str, Any]: + return { + "status": "ok", + "full_result": { + "instruction_template": "...", + "output_style_template": "...", + "annotated_template": _annotated_template(), + "analysis_specifications_template": _annotated_template(), + "json_field_descriptions": "## Top-Level Shape\n...", + "defaults_spec": _defaults_spec(), + "schema_version": "v1.4.0", + }, + } + + +def _make_llm_result(content: Dict[str, Any], status: str = "ok") -> MagicMock: + m = MagicMock() + m.status = status + m.duration_seconds = 1.23 + m.error = None + m.parse_stage = "ok" if status == "ok" else "json_decode_failed" + m.schema_valid = True if status == "ok" else False + m.request_mode = "structured" + m.missing_keys = [] + m.raw_response = json.dumps(content) if status == "ok" else "" + m.content_text = m.raw_response + m.parsed_content = content if status == "ok" else None + return m + + +def _valid_llm_payload(defaults: Dict[str, Any]) -> Dict[str, Any]: + spec = json.loads(json.dumps(defaults)) + spec["description"] = "Example" + spec["getDbCohortMethodDataArgs"]["washoutPeriod"] = 365 + return { + "specifications": spec, + "sectionRationales": { + "study_population": {"rationale": "washout lengthened", "confidence": "high"}, + "time_at_risk": {"rationale": "risk window kept", "confidence": "medium"}, + "propensity_score_adjustment": {"rationale": "defaults", "confidence": "medium"}, + "outcome_model": {"rationale": "defaults", "confidence": "medium"}, + }, + } + + +def _build_agent_with_mocks(bundle_payload: Dict[str, Any], llm_result) -> StudyAgent: + agent = StudyAgent.__new__(StudyAgent) + agent._mcp_client = MagicMock() + agent.call_tool = MagicMock(return_value=bundle_payload) + agent._call_llm = MagicMock(return_value=llm_result) + agent.debug = False + return agent + + +def test_happy_path_returns_shell_shape() -> None: + defaults = _defaults_spec() + agent = _build_agent_with_mocks(_make_bundle_payload(), _make_llm_result(_valid_llm_payload(defaults))) + result = agent.run_cohort_methods_specs_recommendation_flow( + analytic_settings_description="compare A vs B with 1-year washout", + ) + assert result["status"] == "ok" + rec = result["recommendation"] + assert rec["mode"] == "free_text" + assert rec["source"] == "acp_flow" + assert rec["status"] == "received" + assert rec["profile_name"] == "Example" + assert rec["raw_description"] == "compare A vs B with 1-year washout" + assert rec["study_population"]["cohortMethodDataArgs"]["washoutPeriod"] == 365 + assert rec["defaults_snapshot"] == {} + assert "section_rationales" in result + assert result["section_rationales"]["study_population"]["confidence"] == "high" + assert result["section_rationales"]["time_at_risk"]["confidence"] == "medium" + assert result["section_rationales"]["propensity_score_adjustment"]["confidence"] == "medium" + assert result["section_rationales"]["outcome_model"]["confidence"] == "medium" + prompt = agent._call_llm.call_args.args[0] + assert "" not in prompt + assert "" in prompt + assert "" in prompt + assert "## Top-Level Shape" in prompt + + +def test_llm_cohort_definitions_are_passed_through_without_request_metadata() -> None: + defaults = _defaults_spec() + drifted = _valid_llm_payload(defaults) + drifted["specifications"]["cohortDefinitions"] = {"targetCohort": {"id": 666, "name": "LLM supplied"}} + agent = _build_agent_with_mocks(_make_bundle_payload(), _make_llm_result(drifted)) + result = agent.run_cohort_methods_specs_recommendation_flow( + analytic_settings_description="desc", + ) + assert result["cohort_methods_specifications"]["cohortDefinitions"]["targetCohort"]["id"] == 666 + + +def test_llm_parse_error_returns_defaults_fallback() -> None: + bad = _make_llm_result({}, status="error") + bad.parsed_content = None + bad.content_text = "this is not json" + bad.raw_response = "this is not json" + agent = _build_agent_with_mocks(_make_bundle_payload(), bad) + result = agent.run_cohort_methods_specs_recommendation_flow( + analytic_settings_description="desc", + ) + assert result["status"] == "llm_parse_error" + assert result["recommendation"]["status"] == "backfilled" + assert result["diagnostics"]["llm_parse_stage"] in {"json_extract_failed", "json_decode_failed"} + + +def test_section_schema_violation_backfills_with_low_confidence() -> None: + defaults = _defaults_spec() + payload = _valid_llm_payload(defaults) + payload["specifications"]["fitOutcomeModelArgs"] = { + "modelType": "svm", "stratified": False, "useCovariates": False, + "inversePtWeighting": False, "prior": None, "control": None, + } + agent = _build_agent_with_mocks(_make_bundle_payload(), _make_llm_result(payload)) + result = agent.run_cohort_methods_specs_recommendation_flow( + analytic_settings_description="desc", + ) + assert result["status"] == "ok" + assert "fitOutcomeModelArgs" in result["diagnostics"]["failed_sections"] + assert result["recommendation"]["status"] == "backfilled" + assert result["recommendation"]["outcome_model"]["modelType"] == "cox" + assert result["section_rationales"]["outcome_model"]["confidence"] == "low" + + +def test_missing_description_errors_out() -> None: + agent = _build_agent_with_mocks(_make_bundle_payload(), _make_llm_result({})) + result = agent.run_cohort_methods_specs_recommendation_flow( + analytic_settings_description="", + ) + assert result["status"] == "llm_parse_error" + assert "analytic_settings_description" in json.dumps(result["diagnostics"]) + + +def test_mcp_bundle_failure_raises() -> None: + bundle_fail = {"status": "error", "error": "bundle unavailable"} + agent = _build_agent_with_mocks(bundle_fail, _make_llm_result({})) + with pytest.raises(RuntimeError): + agent.run_cohort_methods_specs_recommendation_flow( + analytic_settings_description="desc", + ) diff --git a/tests/test_acp_cohort_methods_route.py b/tests/test_acp_cohort_methods_route.py new file mode 100644 index 0000000..4ac45eb --- /dev/null +++ b/tests/test_acp_cohort_methods_route.py @@ -0,0 +1,15 @@ +import json +from typing import Any, Dict +from unittest.mock import MagicMock + +import pytest + +from study_agent_acp.server import SERVICES + + +pytestmark = pytest.mark.acp + + +def test_service_registry_includes_new_flow() -> None: + endpoints = {entry["name"]: entry["endpoint"] for entry in SERVICES} + assert endpoints.get("cohort_methods_specifications_recommendation") == "/flows/cohort_methods_specifications_recommendation" diff --git a/tests/test_acp_phenotype_flow.py b/tests/test_acp_phenotype_flow.py index 85d90dc..d9dc47c 100644 --- a/tests/test_acp_phenotype_flow.py +++ b/tests/test_acp_phenotype_flow.py @@ -17,31 +17,87 @@ def call_tool(self, name, arguments): if name == "phenotype_search": return { "results": [ - {"cohortId": 1, "name": "Alpha", "short_description": "A"}, - {"cohortId": 2, "name": "Beta", "short_description": "B"}, + { + "phenotype_id": "ohdsi:1", + "name": "Alpha", + "short_description": "A", + "executable_definition_status": "native_ohdsi", + "execution_readiness_score": 1.0, + }, + { + "phenotype_id": "cipher:2", + "name": "Beta", + "short_description": "B", + "executable_definition_status": "codes_only", + "execution_readiness_score": 0.45, + }, ] } if name == "phenotype_prompt_bundle": + task = arguments["task"] return { - "overview": "overview", - "spec": "spec", - "output_schema": {"type": "object"}, + "overview": f"overview {task}", + "spec": f"spec {task}", + "output_schema": {"type": "object", "title": task}, } - raise ValueError("unexpected tool") + if name == "phenotype_fetch_summary": + phenotype_id = arguments["phenotype_id"] + if phenotype_id == "ohdsi:1": + return { + "content": { + "phenotype_id": "ohdsi:1", + "name": "Alpha", + "short_description": "A", + "retrieval_keywords": ["alpha diagnosis"], + "retrieval_concept_labels": ["Alpha condition"], + "methodology_summary": "Native OHDSI cohort.", + } + } + if phenotype_id == "cipher:2": + return { + "content": { + "phenotype_id": "cipher:2", + "name": "Beta", + "short_description": "B", + "retrieval_keywords": ["beta phenotype"], + "retrieval_concept_labels": ["Beta concept"], + "methodology_summary": "CIPHER code-based phenotype.", + } + } + raise ValueError(f"unexpected tool {name}") @pytest.mark.acp def test_acp_flow_candidate_limit(monkeypatch): - def fake_llm(prompt): + llm_calls = [] + + def fake_llm(prompt, required_keys=None): + llm_calls.append((prompt, tuple(required_keys or []))) + if len(llm_calls) == 1: + return { + "plan": "Extract recommendation intent facets.", + "intent_facets": {"phenotype_role": "diagnosis", "condition_or_topic": "test"}, + "reasoning_notes": ["Use diagnosis-focused interpretation."], + } + if len(llm_calls) == 2: + return { + "plan": "Shortlist top executable candidate.", + "intent_facets": {"phenotype_role": "diagnosis"}, + "shortlist_ids": ["ohdsi:1"], + "needs_more_search": False, + "reasoning_notes": ["Need native OHDSI option."], + } return { + "plan": "Recommend hydrated candidate.", "phenotype_recommendations": [ - {"cohortId": 1, "cohortName": "Alpha", "justification": "ok"} - ] + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha", "justification": "ok"} + ], } monkeypatch.setattr(agent_module, "call_llm", fake_llm) - agent = StudyAgent(mcp_client=StubMCPClient()) + client = StubMCPClient() + agent = StudyAgent(mcp_client=client) result = agent.run_phenotype_recommendation_flow( study_intent="test intent", top_k=5, @@ -55,13 +111,86 @@ def fake_llm(prompt): assert result["llm_status"] == "ok" assert result["fallback_reason"] is None assert result["diagnostics"]["llm_schema_valid"] is True + assert result["planning"]["shortlist_ids"] == ["ohdsi:1"] recs = result["recommendations"]["phenotype_recommendations"] assert len(recs) == 1 + assert recs[0]["phenotype_id"] == "ohdsi:1" + prompt_bundle_tasks = [args["task"] for name, args in client.calls if name == "phenotype_prompt_bundle"] + assert prompt_bundle_tasks == [ + "phenotype_recommendation_intent_facets", + "phenotype_recommendation_plan", + "phenotype_recommendations", + ] + assert result["intent_facets"]["intent_facets"]["phenotype_role"] == "diagnosis" + fetch_ids = [args["phenotype_id"] for name, args in client.calls if name == "phenotype_fetch_summary"] + assert fetch_ids == ["ohdsi:1", "cipher:2", "ohdsi:1"] @pytest.mark.acp -def test_acp_flow_parse_failure_returns_explicit_fallback(monkeypatch): +def test_acp_flow_plan_parse_failure_uses_stub_shortlist(monkeypatch): + llm_calls = [] + def fake_llm(prompt, required_keys=None): + llm_calls.append((prompt, tuple(required_keys or []))) + if len(llm_calls) == 1: + return { + "plan": "Extract recommendation intent facets.", + "intent_facets": {"phenotype_role": "diagnosis", "condition_or_topic": "test"}, + "reasoning_notes": ["Use diagnosis-focused interpretation."], + } + if len(llm_calls) == 2: + return LLMCallResult( + status="json_parse_failed", + error="json_parse_failed", + parse_stage="chat_completions_content:json_loads", + duration_seconds=1.0, + request_mode="chat_completions", + content_text='{"plan": ', + ) + return { + "plan": "Recommend fallback-shortlisted candidate.", + "phenotype_recommendations": [ + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha", "justification": "ok"} + ], + } + + monkeypatch.setattr(agent_module, "call_llm", fake_llm) + + client = StubMCPClient() + agent = StudyAgent(mcp_client=client) + result = agent.run_phenotype_recommendation_flow( + study_intent="test intent", + top_k=5, + max_results=3, + candidate_limit=1, + ) + assert result["status"] == "ok" + assert result["planning"]["mode"] == "stub" + assert result["planning"]["shortlist_ids"] == ["ohdsi:1"] + assert result["llm_used"] is True + assert result["diagnostics"]["planning"]["llm_status"] == "json_parse_failed" + + +@pytest.mark.acp +def test_acp_flow_final_parse_failure_returns_explicit_fallback(monkeypatch): + llm_calls = [] + + def fake_llm(prompt, required_keys=None): + llm_calls.append((prompt, tuple(required_keys or []))) + if len(llm_calls) == 1: + return { + "plan": "Extract recommendation intent facets.", + "intent_facets": {"phenotype_role": "diagnosis", "condition_or_topic": "test"}, + "reasoning_notes": ["Use diagnosis-focused interpretation."], + } + if len(llm_calls) == 2: + return { + "plan": "Shortlist both.", + "intent_facets": {"phenotype_role": "diagnosis"}, + "shortlist_ids": ["ohdsi:1", "cipher:2"], + "needs_more_search": False, + "reasoning_notes": ["Compare executable and non-executable options."], + } return LLMCallResult( status="json_parse_failed", error="json_parse_failed", @@ -86,4 +215,124 @@ def fake_llm(prompt, required_keys=None): assert result["fallback_reason"] == "llm_json_parse_failed" assert result["fallback_mode"] == "stub" assert result["diagnostics"]["llm_parse_stage"] == "chat_completions_content:json_loads" + assert result["diagnostics"]["planning"]["llm_status"] == "ok" assert result["recommendations"]["mode"] == "stub" + + +@pytest.mark.acp +def test_acp_flow_reranks_planning_candidates_by_metadata(monkeypatch): + llm_calls = [] + + class MetadataStubMCPClient(StubMCPClient): + def call_tool(self, name, arguments): + self.calls.append((name, arguments)) + if name == "phenotype_search": + return { + "results": [ + { + "phenotype_id": "ohdsi:wrong", + "name": "AAA Repair", + "short_description": "Procedure phenotype", + "score": 10.0, + "executable_definition_status": "native_ohdsi", + "execution_readiness_score": 1.0, + }, + { + "phenotype_id": "cipher:right", + "name": "Abdominal Aortic Aneurysm Diagnosis", + "short_description": "Diagnosis phenotype", + "score": 9.0, + "executable_definition_status": "codes_only", + "execution_readiness_score": 0.45, + }, + ] + } + if name == "phenotype_prompt_bundle": + task = arguments["task"] + return { + "overview": f"overview {task}", + "spec": f"spec {task}", + "output_schema": {"type": "object", "title": task}, + } + if name == "phenotype_fetch_summary": + phenotype_id = arguments["phenotype_id"] + if phenotype_id == "ohdsi:wrong": + return { + "content": { + "phenotype_id": "ohdsi:wrong", + "name": "AAA Repair", + "short_description": "Procedure phenotype", + "primary_clinical_topic": "abdominal aortic aneurysm repair", + "phenotype_role": "procedure", + "care_setting_scope": "inpatient", + "population_scope": "adults", + "target_vs_context_conditions": {"target": ["abdominal aortic aneurysm"], "context": ["post-op atrial fibrillation"]}, + "exclude_from_primary_topic_match": ["procedure", "post-op"], + "recommendation_summary": "Procedure cohort after AAA repair.", + } + } + if phenotype_id == "cipher:right": + return { + "content": { + "phenotype_id": "cipher:right", + "name": "Abdominal Aortic Aneurysm Diagnosis", + "short_description": "Diagnosis phenotype", + "primary_clinical_topic": "abdominal aortic aneurysm", + "phenotype_role": "diagnosis", + "care_setting_scope": "any", + "population_scope": "veterans", + "target_vs_context_conditions": {"target": ["abdominal aortic aneurysm"]}, + "exclude_from_primary_topic_match": [], + "recommendation_summary": "Core AAA diagnosis phenotype.", + } + } + raise ValueError(f"unexpected tool {name}") + + def fake_llm(prompt, required_keys=None): + llm_calls.append((prompt, tuple(required_keys or []))) + if len(llm_calls) == 1: + return { + "plan": "Extract recommendation intent facets.", + "intent_facets": { + "condition_or_topic": "abdominal aortic aneurysm", + "phenotype_role": "diagnosis", + "care_setting": "any", + "population_cue": "veterans", + }, + "reasoning_notes": ["Prefer diagnosis phenotype."], + } + if len(llm_calls) == 2: + assert prompt.index('"phenotype_id": "cipher:right"') < prompt.index('"phenotype_id": "ohdsi:wrong"') + return { + "plan": "Shortlist diagnosis candidate first.", + "intent_facets": {"phenotype_role": "diagnosis"}, + "shortlist_ids": ["cipher:right"], + "needs_more_search": False, + "reasoning_notes": ["Diagnosis metadata outranks procedure metadata."], + } + return { + "plan": "Recommend diagnosis candidate.", + "phenotype_recommendations": [ + {"phenotype_id": "cipher:right", "phenotype_name": "Abdominal Aortic Aneurysm Diagnosis", "justification": "ok"} + ], + } + + monkeypatch.setattr(agent_module, "call_llm", fake_llm) + + agent = StudyAgent(mcp_client=MetadataStubMCPClient()) + result = agent.run_phenotype_recommendation_flow( + study_intent="veterans who experienced an abdominal aortic aneurysm", + top_k=5, + max_results=3, + candidate_limit=1, + ) + assert result["status"] == "ok" + assert result["planning"]["shortlist_ids"] == ["cipher:right"] + assert result["recommendations"]["phenotype_recommendations"][0]["phenotype_id"] == "cipher:right" + rerank = result["diagnostics"]["planning_rerank"] + assert rerank["candidate_count"] == 2 + assert rerank["candidates"][0]["phenotype_id"] == "cipher:right" + assert rerank["candidates"][1]["phenotype_id"] == "ohdsi:wrong" + assert rerank["candidates"][0]["metadata_score"] > rerank["candidates"][1]["metadata_score"] + assert any(reason["kind"] == "role_match" for reason in rerank["candidates"][0]["reasons"]) + assert any(reason["kind"] == "exclude_procedure" for reason in rerank["candidates"][1]["reasons"]) diff --git a/tests/test_acp_server.py b/tests/test_acp_server.py index 42afebc..339d207 100644 --- a/tests/test_acp_server.py +++ b/tests/test_acp_server.py @@ -124,6 +124,8 @@ def call_tool(self, name, arguments): return {"overview": "overview", "spec": "spec", "output_schema": {"type": "object"}} if name == "phenotype_intent_split": return {"overview": "overview", "spec": "spec", "output_schema": {"type": "object"}} + if name == "cohort_methods_intent_split": + return {"overview": "overview", "spec": "spec", "output_schema": {"type": "object"}} if name == "lint_prompt_bundle": return {"overview": "overview", "spec": "spec", "output_schema": {"type": "object"}} if name == "keeper_sanitize_row": @@ -735,6 +737,93 @@ def call_tool(self, name, arguments): assert result["error"] == "phenotype_intent_split_prompt_failed" +@pytest.mark.acp +def test_flow_cohort_methods_intent_split(monkeypatch): + import study_agent_acp.agent as agent_module + + captured = {} + + def fake_llm(prompt, required_keys=None): + captured["prompt"] = prompt + captured["required_keys"] = required_keys + return { + "status": "ok", + "plan": "plan", + "target_statement": "Metformin users", + "comparator_statement": "Sulfonylurea users", + "outcome_statement": "GI bleeding", + "outcome_statements": ["GI bleeding", "Stroke"], + "rationale": "Rationale", + "questions": [], + } + + monkeypatch.setattr(agent_module, "call_llm", fake_llm) + agent = StudyAgent(mcp_client=StubMCPClient()) + result = agent.run_cohort_methods_intent_split_flow( + study_intent="Compare metformin versus sulfonylurea on GI bleeding.", + ) + assert result["status"] == "ok" + assert result["intent_split"]["target_statement"] == "Metformin users" + assert result["intent_split"]["comparator_statement"] == "Sulfonylurea users" + assert result["intent_split"]["outcome_statements"] == ["GI bleeding", "Stroke"] + assert "cohort_methods_intent_split" in captured.get("prompt", "") + assert "comparator_statement" in captured.get("required_keys", []) + assert "outcome_statements" in captured.get("required_keys", []) + + +@pytest.mark.acp +def test_flow_cohort_methods_intent_split_schema_mismatch(monkeypatch): + import study_agent_acp.agent as agent_module + + def fake_llm(prompt, required_keys=None): + return LLMCallResult( + status="schema_mismatch", + parsed_content={"target_statement": "Target only"}, + parse_stage="chat_completions_content:schema", + error="missing_required_keys:comparator_statement,outcome_statements,rationale", + missing_keys=["comparator_statement", "outcome_statements", "rationale"], + schema_valid=False, + request_mode="chat_completions", + ) + + monkeypatch.setattr(agent_module, "call_llm", fake_llm) + agent = StudyAgent(mcp_client=StubMCPClient()) + result = agent.run_cohort_methods_intent_split_flow( + study_intent="Intent text", + ) + assert result["status"] == "error" + assert result["error"] == "llm_unavailable" + assert result["diagnostics"]["llm_missing_keys"] == [ + "comparator_statement", + "outcome_statements", + "rationale", + ] + + +@pytest.mark.acp +def test_flow_cohort_methods_intent_split_missing_intent(): + agent = StudyAgent(mcp_client=StubMCPClient()) + result = agent.run_cohort_methods_intent_split_flow(study_intent="") + assert result["status"] == "error" + assert result["error"] == "missing study_intent" + + +@pytest.mark.acp +def test_flow_cohort_methods_intent_split_prompt_bundle_error(): + class BadMCPClient(StubMCPClient): + def call_tool(self, name, arguments): + if name == "cohort_methods_intent_split": + return {"error": "bad prompt"} + return super().call_tool(name, arguments) + + agent = StudyAgent(mcp_client=BadMCPClient()) + result = agent.run_cohort_methods_intent_split_flow( + study_intent="Intent text", + ) + assert result["status"] == "error" + assert result["error"] == "cohort_methods_intent_split_prompt_failed" + + @pytest.mark.acp def test_flow_case_causal_review(monkeypatch): diff --git a/tests/test_build_phenotype_index.py b/tests/test_build_phenotype_index.py new file mode 100644 index 0000000..d2f538b --- /dev/null +++ b/tests/test_build_phenotype_index.py @@ -0,0 +1,290 @@ +import importlib.util +import json +from pathlib import Path + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "mcp_server" / "scripts" / "build_phenotype_index.py" +SPEC = importlib.util.spec_from_file_location("build_phenotype_index", MODULE_PATH) +assert SPEC and SPEC.loader +builder = importlib.util.module_from_spec(SPEC) +SPEC.loader.exec_module(builder) + + +def test_build_ohdsi_row_adds_concept_evidence_and_retrieval_fields() -> None: + definition_path = Path("data/cohorts/2.json") + definition = json.loads(definition_path.read_text(encoding="utf-8")) + meta = { + "cohortId": "2", + "cohortName": "COVID-19 positive test or diagnosis", + "logicDescription": "Persons with a COVID-19 condition or positive SARS-CoV-2 test.", + "notes": "Uses diagnosis or measurement entry criteria.", + "hashTag": "covid-19, infection", + "recommendedReferentConceptIds": "37311061;756055", + "numberOfInclusionRules": "0", + "numberOfConceptSets": "2", + "domainsInEntryEvents": "Condition;Measurement", + "status": "Published", + "demographicCriteriaGender": "Any", + "createdDate": "2020-01-01", + "modifiedDate": "2020-01-02", + "addedVersion": "v1", + "librarian": "Test Librarian", + } + + row = builder._build_ohdsi_row(meta, definition) + + assert row["raw_keywords"] == [] + assert "native OHDSI cohort" in row["retrieval_keywords"] + assert "COVID-19" in row["retrieval_concept_labels"] + assert "HCPCS" in row["retrieval_concept_labels"] + assert row["methodology_summary"] == "Native OHDSI cohort with 2 concept sets and 0 inclusion rules." + assert row["concept_evidence"]["coverage_summary"] == { + "has_codes": True, + "has_labels": True, + "has_omop_mapping": True, + } + vocab_names = {item["system_name"] for item in row["code_systems"]} + assert "SNOMED" in vocab_names + assert "HCPCS" in vocab_names + assert "HCPCS" in row["retrieval_text"] + + +def test_build_cipher_row_preserves_raw_keywords_and_derived_labels() -> None: + cipher_path = Path("data/cipher-phenotypes/Abdominal aortic aneurysm (MAP).json") + enum_path = Path("data/cipher-phenotypes/enumType 1.json") + record = json.loads(cipher_path.read_text(encoding="utf-8")) + enum_map = builder._load_cipher_enum_map(str(enum_path)) + + row = builder._build_cipher_row(str(cipher_path), record, enum_map) + + assert row["tags"] == ["General"] + assert row["raw_keywords"] == [] + assert "ICD-9 Diagnostic Codes" in row["retrieval_concept_labels"] + assert "MAP" in row["retrieval_keywords"] + assert row["methodology_summary"].startswith("MAP is an unsupervised clustering algorithm") + assert row["concept_evidence"]["coverage_summary"]["has_codes"] is True + assert row["concept_evidence"]["coverage_summary"]["has_labels"] is True + assert any(item["system_name"] == "ICD-10 Diagnostic Codes" for item in row["code_systems"]) + + +def _sample_row() -> dict: + return { + "phenotype_id": "cipher:test-1", + "source_dataset": "va_cipher", + "name": "Post-traumatic stress disorder", + "short_description": "PTSD phenotype for veterans.", + "long_description": "Derived phenotype with ICD and narrative evidence.", + "tags": ["General"], + "raw_keywords": ["veteran"], + "retrieval_keywords": ["General", "veteran", "ICD-10 Diagnostic Codes", "PTSD"], + "retrieval_concept_labels": ["ICD-10 Diagnostic Codes", "PTSD"], + "methodology_summary": "Codes and narrative evidence for PTSD.", + "ontology_keys": [], + "signals": ["source:cipher", "execution:codes_only", "method_family:map"], + "executable_definition_status": "codes_only", + "adaptation_notes": "Requires translation to OHDSI logic.", + "primary_clinical_topic": "Post-traumatic stress disorder", + "secondary_topics": [], + "phenotype_role": "unknown", + "care_setting_scope": "unspecified", + "population_scope": "", + "topic_mentions": {"primary_topics": ["Post-traumatic stress disorder"], "context_only_topics": [], "downstream_or_related_topics": []}, + "target_vs_context_conditions": {"target_conditions": ["Post-traumatic stress disorder"], "context_conditions": []}, + "exclude_from_primary_topic_match": [], + "recommendation_summary": "PTSD phenotype for veterans.", + "recommendation_metadata_source": "heuristic", + } + + +def test_apply_llm_retrieval_keywords_uses_llm_and_cache(monkeypatch) -> None: + calls = [] + + def fake_call(prompt: str) -> dict: + calls.append(prompt) + return { + "status": "ok", + "parsed_content": {"retrieval_keywords": ["PTSD", "veteran cohort", "trauma", "ICD-10"]}, + } + + monkeypatch.setattr(builder, "_call_keyword_llm", fake_call) + cache = {} + row = _sample_row() + + builder._apply_llm_retrieval_keywords(row, cache, enabled=True, max_terms=6) + + assert row["retrieval_keywords_source"] == "llm" + assert row["retrieval_keywords"] == ["PTSD", "veteran cohort", "trauma", "ICD-10"] + assert len(cache) == 1 + assert len(calls) == 1 + + monkeypatch.setattr(builder, "_call_keyword_llm", lambda prompt: (_ for _ in ()).throw(AssertionError("should use cache"))) + cached_row = _sample_row() + builder._apply_llm_retrieval_keywords(cached_row, cache, enabled=True, max_terms=6) + + assert cached_row["retrieval_keywords_source"] == "llm_cached" + assert cached_row["retrieval_keywords"] == ["PTSD", "veteran cohort", "trauma", "ICD-10"] + + + +def test_apply_llm_retrieval_keywords_falls_back_on_invalid_llm(monkeypatch) -> None: + monkeypatch.setattr( + builder, + "_call_keyword_llm", + lambda prompt: {"status": "schema_mismatch", "parsed_content": {}}, + ) + cache = {} + row = _sample_row() + fallback = list(row["retrieval_keywords"]) + + builder._apply_llm_retrieval_keywords(row, cache, enabled=True, max_terms=6) + + assert row["retrieval_keywords_source"] == "heuristic" + assert row["retrieval_keywords"] == fallback + assert cache == {} + + +def test_apply_llm_retrieval_keywords_returns_cache_entry(monkeypatch) -> None: + monkeypatch.setattr( + builder, + "_call_keyword_llm", + lambda prompt: { + "status": "ok", + "parsed_content": {"retrieval_keywords": ["PTSD", "trauma cohort"]}, + }, + ) + cache = {} + row = _sample_row() + + entry = builder._apply_llm_retrieval_keywords(row, cache, enabled=True, max_terms=6) + + assert entry is not None + assert entry["phenotype_id"] == "cipher:test-1" + assert entry["cache_key"].startswith("cipher:test-1:") + assert entry["retrieval_keywords"] == ["PTSD", "trauma cohort"] + + + +def test_apply_llm_recommendation_metadata_uses_llm_and_cache(monkeypatch) -> None: + calls = [] + + def fake_call(prompt: str) -> dict: + calls.append(prompt) + return { + "status": "ok", + "parsed_content": { + "primary_clinical_topic": "Post-traumatic stress disorder", + "secondary_topics": ["trauma"], + "phenotype_role": "diagnosis", + "care_setting_scope": "outpatient", + "population_scope": "veterans", + "topic_mentions": { + "primary_topics": ["post-traumatic stress disorder"], + "context_only_topics": [], + "downstream_or_related_topics": ["trauma"], + }, + "target_vs_context_conditions": { + "target_conditions": ["post-traumatic stress disorder"], + "context_conditions": [], + }, + "exclude_from_primary_topic_match": ["trauma study context"], + "recommendation_summary": "PTSD diagnosis phenotype for veterans.", + }, + } + + monkeypatch.setattr(builder, "_call_recommendation_metadata_llm", fake_call) + cache = {} + row = _sample_row() + + entry = builder._apply_llm_recommendation_metadata(row, cache, enabled=True) + + assert row["recommendation_metadata_source"] == "llm" + assert row["phenotype_role"] == "diagnosis" + assert row["care_setting_scope"] == "outpatient" + assert row["primary_clinical_topic"] == "Post-traumatic stress disorder" + assert row["recommendation_summary"] == "PTSD diagnosis phenotype for veterans." + assert entry is not None + assert entry["cache_key"].startswith("recommendation:cipher:test-1:") + assert len(calls) == 1 + + monkeypatch.setattr(builder, "_call_recommendation_metadata_llm", lambda prompt: (_ for _ in ()).throw(AssertionError("should use cache"))) + cached_row = _sample_row() + builder._apply_llm_recommendation_metadata(cached_row, cache, enabled=True) + + assert cached_row["recommendation_metadata_source"] == "llm_cached" + assert cached_row["phenotype_role"] == "diagnosis" + assert cached_row["care_setting_scope"] == "outpatient" + + +def test_jsonl_cache_append_and_load_round_trip(tmp_path) -> None: + cache_path = tmp_path / "keyword_cache.jsonl" + entry_a = { + "cache_key": "cipher:test-1:abc", + "phenotype_id": "cipher:test-1", + "retrieval_keywords": ["PTSD", "trauma cohort"], + } + entry_b = { + "cache_key": "ohdsi:2:def", + "phenotype_id": "ohdsi:2", + "retrieval_keywords": ["COVID-19", "SARS-CoV-2"], + } + + builder._append_jsonl_cache_entry(str(cache_path), entry_a) + builder._append_jsonl_cache_entry(str(cache_path), entry_b) + + loaded = builder._load_jsonl_cache(str(cache_path)) + + assert loaded["cipher:test-1:abc"]["retrieval_keywords"] == ["PTSD", "trauma cohort"] + assert loaded["ohdsi:2:def"]["retrieval_keywords"] == ["COVID-19", "SARS-CoV-2"] + + +def test_build_keyword_prompt_uses_prompt_bundle_files() -> None: + prompt = builder._build_keyword_prompt( + { + "phenotype_id": "cipher:test-1", + "name": "Post-traumatic stress disorder", + }, + max_terms=8, + ) + + assert "Task: `phenotype_index_keywords`." in prompt + assert "Output contract:" in prompt + assert '"retrieval_keywords"' in prompt + assert '"max_terms": 8' in prompt + + +def test_main_dense_only_reuses_existing_catalog(monkeypatch, tmp_path) -> None: + out_dir = tmp_path / "index" + out_dir.mkdir() + row = { + "phenotype_id": "cipher:test-1", + "name": "PTSD phenotype", + "retrieval_text": "ptsd veteran trauma", + } + (out_dir / "catalog.jsonl").write_text(json.dumps(row) + "\n", encoding="utf-8") + (out_dir / "meta.json").write_text(json.dumps({"catalog_count": 1, "source_counts": {"va_cipher": 1}}), encoding="utf-8") + + def fake_build_dense_index(catalog, output_path, embed_client, cache_path, batch_size=64, require_dense=False): + Path(output_path).write_text("dense-placeholder", encoding="utf-8") + Path(cache_path).write_bytes(b"cache") + return {"status": "ok", "dim": 8, "count": len(catalog)} + + monkeypatch.setattr(builder, "_build_dense_index", fake_build_dense_index) + monkeypatch.setattr( + "sys.argv", + [ + "build_phenotype_index.py", + "--output-dir", + str(out_dir), + "--build-dense", + "--dense-only", + ], + ) + + rc = builder.main() + + assert rc == 0 + meta = json.loads((out_dir / "meta.json").read_text(encoding="utf-8")) + assert meta["dense"] == {"status": "ok", "dim": 8, "count": 1} + assert (out_dir / "dense.index").exists() + catalog_text = (out_dir / "catalog.jsonl").read_text(encoding="utf-8") + assert "text_for_embedding_hash" not in catalog_text or "text_for_embedding" not in catalog_text or isinstance(catalog_text, str) diff --git a/tests/test_case_causal_review_tools.py b/tests/test_case_causal_review_tools.py index 5706018..1e857e1 100644 --- a/tests/test_case_causal_review_tools.py +++ b/tests/test_case_causal_review_tools.py @@ -93,6 +93,72 @@ def test_case_causal_review_sanitize_row_supports_candidate_and_context_items() assert sanitized["tool_hints"]["prefetch_expansions"] == ["get_case_review_drug_signal_details"] +@pytest.mark.mcp +def test_case_causal_review_sanitize_row_preserves_controlled_identifier_fields() -> None: + tools = _registered_tools() + payload = tools["case_causal_review_sanitize_row"]( + { + "case_id": "case-1", + "case_summary": "Bleeding event after anticoagulant exposure.", + "index_event": { + "domain": "index_event", + "label": "Gastrointestinal bleeding", + "source_record_id": "reaction-1", + "annotations": {"adverse_event_meddra_id": "10075534", "adverse_event_concept_id": 321}, + }, + "candidate_items": [ + { + "domain": "Drug Exposures", + "label": "Warfarin", + "source_record_id": "drug-1", + "source_kind": "reported_drug", + "subrole": "primary_suspect", + "annotations": {"ingred_rxcui": "36567", "ingredient_concept_id": 123}, + } + ], + "context_items": [], + }, + ["drug_exposures"], + ) + annotations = payload["sanitized_row"]["candidate_items"][0]["annotations"] + index_annotations = payload["sanitized_row"]["index_event"]["annotations"] + assert annotations["ingred_rxcui"] == "36567" + assert annotations["ingredient_concept_id"] == 123 + assert index_annotations["adverse_event_meddra_id"] == "10075534" + assert index_annotations["adverse_event_concept_id"] == 321 + + +@pytest.mark.mcp +def test_case_causal_review_sanitize_row_redacts_zip_in_free_text() -> None: + tools = _registered_tools() + payload = tools["case_causal_review_sanitize_row"]( + { + "case_id": "case-1", + "case_summary": "Patient reported event near ZIP 15213 after exposure.", + "index_event": { + "domain": "index_event", + "label": "Gastrointestinal bleeding", + "source_record_id": "reaction-1", + }, + "candidate_items": [ + { + "domain": "Drug Exposures", + "label": "Warfarin", + "source_record_id": "drug-1", + "source_kind": "reported_drug", + "subrole": "primary_suspect", + "why_observed": "Filled in 15213 and later implicated", + } + ], + "context_items": [], + }, + ["drug_exposures"], + ) + sanitized = payload["sanitized_row"] + assert "[REDACTED_ZIP]" in sanitized["case_summary"] + assert "[REDACTED_ZIP]" in sanitized["candidate_items"][0]["why_observed"] + + @pytest.mark.mcp def test_case_causal_review_sanitize_row_rejects_phi() -> None: tools = _registered_tools() @@ -115,6 +181,28 @@ def test_case_causal_review_sanitize_row_rejects_phi() -> None: assert payload["error"] == "unsafe_case_row" +@pytest.mark.mcp +def test_case_causal_review_sanitize_row_rejects_person_id_phi_key() -> None: + tools = _registered_tools() + payload = tools["case_causal_review_sanitize_row"]( + { + "case_id": "case-2", + "case_summary": "Unsafe payload", + "index_event": { + "domain": "index_event", + "label": "Cystitis", + "source_record_id": "reaction-4", + }, + "candidate_items": [ + {"domain": "drug_exposures", "label": "Ketamine", "source_record_id": "drug-1"} + ], + "case_metadata": {"person_id": "12345"}, + }, + [], + ) + assert payload["error"] == "unsafe_case_row" + + @pytest.mark.mcp def test_case_causal_review_build_prompt_keeps_candidate_and_context_items_distinct() -> None: tools = _registered_tools() diff --git a/tests/test_cohort_methods_generated_scripts.py b/tests/test_cohort_methods_generated_scripts.py new file mode 100644 index 0000000..1bc2edb --- /dev/null +++ b/tests/test_cohort_methods_generated_scripts.py @@ -0,0 +1,201 @@ +from pathlib import Path +import shutil +import subprocess + +import pytest + + +SOURCE = Path("R/OHDSIAssistant/R/strategus_cohort_methods_shell.R") +EXECUTION_SETTINGS_SOURCE = Path("R/OHDSIAssistant/R/execution_settings.R") + + +def _generated_script_block(source: str, script_name: str, filename: str) -> str: + start = source.index(f"{script_name} <- c(") + end = source.index(f'write_lines(file.path(scripts_dir, "{filename}")', start) + return source[start:end] + + +def _run_r_or_skip(expression: str) -> subprocess.CompletedProcess[str]: + if shutil.which("Rscript") is None: + pytest.skip("Rscript is not available") + result = subprocess.run( + ["Rscript", "-e", expression], + check=False, + text=True, + capture_output=True, + ) + if result.returncode == 42: + pytest.skip(result.stderr.strip() or result.stdout.strip() or "required R package is not available") + return result + + +def test_generated_cm_spec_builds_and_executes_strategus_analysis_specification() -> None: + source = SOURCE.read_text(encoding="utf-8") + block = _generated_script_block(source, "script_06", "06_cm_spec.R") + + assert "analysisSpecification.json" in block + assert "CharacterizationModule$new()" in block + assert "CohortIncidenceModule$new()" in block + assert "CohortMethodModule$new()" in block + assert "CohortGeneratorModule$new()" in block + assert "CohortDiagnosticsModule$new()" not in block + assert "cohortGeneratorModuleSpecifications" not in block + assert "cohortDiagnosticsModuleSpecifications" not in block + assert "target_id <- as.numeric(" in block + assert "outcome_ids <- vapply(" in block + assert "numeric(1)" in block + assert "outcomeIds = as.numeric(outcome_ids)" in block + assert "outcomeWashoutDays = as.numeric(" in block + assert "maxCohortSize = studyPopulationDefaults$maxCohortSize" in block + assert "createStudyPopulationArgs <- CohortMethod::createCreateStudyPopulationArgs(" in block + assert "removeSubjectsWithPriorOutcome = studyPopulationDefaults$removeSubjectsWithPriorOutcome" in block + assert "useRegularization =" not in block + assert "prior = outcomeModelPrior" in block + assert "CohortMethod::createCmAnalysesSpecifications(" in block + assert "cmAnalysesSpecifications = cmAnalysesSpecifications$toList()" in block + assert "ParallelLogger::saveSettingsToJson(analysisSpecifications, analysis_spec_path)" in block + assert "result <- Strategus::execute(" in block + assert "connectionDetails <- OHDSIAssistant::createStrategusConnectionDetails(path = db_details_path)" in block + assert "exec <- OHDSIAssistant::createStrategusExecutionSettings(path = execution_settings_path)" in block + assert "CohortMethod::runCmAnalyses(" not in block + assert "CohortMethod::loadCmAnalysisList(" not in block + assert "CohortMethod::loadTargetComparatorOutcomesList(" not in block + + +def test_cm_runner_is_merged_into_script_06() -> None: + source = SOURCE.read_text(encoding="utf-8") + + assert "script_07 <- c(" not in source + assert 'write_lines(file.path(scripts_dir, "07_cm_run_analyses.R")' not in source + assert 'cat(" - 07_cm_run_analyses.R\\n")' not in source + + +def test_characterization_spec_accepts_generated_numeric_types() -> None: + result = _run_r_or_skip( + """ + if (!requireNamespace('Strategus', quietly = TRUE)) quit(status = 42) + library(Strategus) + module <- CharacterizationModule$new() + spec <- module$createModuleSpecifications( + targetIds = as.numeric(c(1, 2)), + outcomeIds = as.numeric(c(3)), + limitToFirstInNDays = as.numeric(c(99999, 99999)), + minPriorObservation = as.numeric(365), + outcomeWashoutDays = as.numeric(c(99999)), + riskWindowStart = as.numeric(0), + startAnchor = 'cohort start', + riskWindowEnd = as.numeric(0), + endAnchor = 'cohort end', + mode = 'CohortIncidence' + ) + stopifnot(identical(spec$module, 'CharacterizationModule')) + """ + ) + assert result.returncode == 0, result.stderr + + +def test_execution_settings_falls_back_when_max_cores_is_na() -> None: + result = _run_r_or_skip( + f""" + if (!requireNamespace('Strategus', quietly = TRUE) || + !requireNamespace('CohortGenerator', quietly = TRUE)) quit(status = 42) + library(Strategus) + library(CohortGenerator) + source('{EXECUTION_SETTINGS_SOURCE.as_posix()}') + exec <- createStrategusExecutionSettings(settings = list( + cdmDatabaseSchema = 'cdm', + workDatabaseSchema = 'work', + resultsDatabaseSchema = 'results', + vocabularyDatabaseSchema = 'vocab', + cohortTable = 'cohort', + workFolder = tempdir(), + resultsFolder = tempdir(), + maxCores = NA + )) + stopifnot(identical(exec$maxCores, 1L)) + stopifnot(exec$executionSettings$maxCores == 1) + """ + ) + assert result.returncode == 0, result.stderr + + +def test_cohort_method_spec_accepts_generated_argument_shape() -> None: + result = _run_r_or_skip( + """ + if (!requireNamespace('CohortMethod', quietly = TRUE) || + !requireNamespace('FeatureExtraction', quietly = TRUE) || + !requireNamespace('Cyclops', quietly = TRUE)) quit(status = 42) + library(CohortMethod) + target_id <- as.numeric(1) + comparator_id <- as.numeric(2) + outcome_ids <- as.numeric(3) + outcomes <- lapply(outcome_ids, function(outcome_id) { + CohortMethod::createOutcome( + outcomeId = outcome_id, + outcomeOfInterest = TRUE, + priorOutcomeLookback = 99999, + riskWindowStart = 0, + startAnchor = 'cohort start', + riskWindowEnd = 0, + endAnchor = 'cohort end' + ) + }) + targetComparatorOutcomesList <- list( + CohortMethod::createTargetComparatorOutcomes( + targetId = target_id, + comparatorId = comparator_id, + outcomes = outcomes, + excludedCovariateConceptIds = numeric(0), + includedCovariateConceptIds = numeric(0) + ) + ) + getDbArgs <- CohortMethod::createGetDbCohortMethodDataArgs( + removeDuplicateSubjects = 'keep first, truncate to second', + firstExposureOnly = TRUE, + washoutPeriod = 365, + restrictToCommonPeriod = TRUE, + studyStartDate = '', + studyEndDate = '', + maxCohortSize = 0, + covariateSettings = FeatureExtraction::createDefaultCovariateSettings() + ) + studyPopulationArgs <- CohortMethod::createCreateStudyPopulationArgs( + removeSubjectsWithPriorOutcome = TRUE, + priorOutcomeLookback = 99999, + minDaysAtRisk = 1, + riskWindowStart = 0, + startAnchor = 'cohort start', + riskWindowEnd = 0, + endAnchor = 'cohort end', + censorAtNewRiskWindow = FALSE + ) + outcomeModelPrior <- Cyclops::createPrior(priorType = 'laplace', useCrossValidation = TRUE) + fitOutcomeModelArgs <- CohortMethod::createFitOutcomeModelArgs( + modelType = 'cox', + stratified = FALSE, + useCovariates = FALSE, + inversePtWeighting = FALSE, + prior = outcomeModelPrior + ) + cmAnalysisList <- list( + CohortMethod::createCmAnalysis( + analysisId = 1, + description = 'test', + getDbCohortMethodDataArgs = getDbArgs, + createStudyPopulationArgs = studyPopulationArgs, + createPsArgs = NULL, + trimByPsArgs = NULL, + matchOnPsArgs = NULL, + stratifyByPsArgs = NULL, + fitOutcomeModelArgs = fitOutcomeModelArgs + ) + ) + spec <- CohortMethod::createCmAnalysesSpecifications( + cmAnalysisList = cmAnalysisList, + targetComparatorOutcomesList = targetComparatorOutcomesList, + cmDiagnosticThresholds = CohortMethod::createCmDiagnosticThresholds() + ) + stopifnot(length(spec) > 0) + """ + ) + assert result.returncode == 0, result.stderr diff --git a/tests/test_cohort_methods_prompt_bundle.py b/tests/test_cohort_methods_prompt_bundle.py new file mode 100644 index 0000000..008bd75 --- /dev/null +++ b/tests/test_cohort_methods_prompt_bundle.py @@ -0,0 +1,95 @@ +import pytest +from pathlib import Path + +from study_agent_mcp.tools import cohort_methods_prompt_bundle +from study_agent_core.cohort_methods_spec_validation import COHORT_METHODS_SPEC_TOP_LEVEL_KEYS, validate_cohort_methods_spec + + +class DummyMCP: + def __init__(self) -> None: + self.tools: dict = {} + + def tool(self, name: str): + def decorator(fn): + self.tools[name] = fn + return fn + + return decorator + + +PROMPT_DIR = Path("mcp_server/prompts/cohort_methods") + + +@pytest.mark.mcp +def test_bundle_returns_expected_keys() -> None: + mcp = DummyMCP() + cohort_methods_prompt_bundle.register(mcp) + fn = mcp.tools["cohort_methods_prompt_bundle"] + payload = fn() + assert "instruction_template" in payload + assert "output_style_template" in payload + assert "annotated_template" in payload + assert "analysis_specifications_template" in payload + assert "json_field_descriptions" in payload + assert "defaults_spec" in payload + assert payload["schema_version"] == "v1.4.0" + + +@pytest.mark.mcp +def test_instruction_and_output_style_load_from_prompt_files() -> None: + mcp = DummyMCP() + cohort_methods_prompt_bundle.register(mcp) + fn = mcp.tools["cohort_methods_prompt_bundle"] + payload = fn() + assert payload["instruction_template"] == (PROMPT_DIR / "instruction_cohort_methods_specs.md").read_text(encoding="utf-8").strip() + assert payload["output_style_template"] == (PROMPT_DIR / "output_style_cohort_methods_specs.md").read_text(encoding="utf-8").strip() + assert "" in payload["instruction_template"] + assert "" in payload["output_style_template"] + + +@pytest.mark.mcp +def test_analysis_template_loads_cm_analysis_template() -> None: + mcp = DummyMCP() + cohort_methods_prompt_bundle.register(mcp) + fn = mcp.tools["cohort_methods_prompt_bundle"] + payload = fn() + assert "customAtlasTemplate" not in payload["analysis_specifications_template"] + assert "/*" not in payload["analysis_specifications_template"] + assert "fitOutcomeModelArgs" in payload["analysis_specifications_template"] + assert payload["annotated_template"] == payload["analysis_specifications_template"] + + +@pytest.mark.mcp +def test_json_field_descriptions_start_at_top_level_shape() -> None: + mcp = DummyMCP() + cohort_methods_prompt_bundle.register(mcp) + fn = mcp.tools["cohort_methods_prompt_bundle"] + payload = fn() + descriptions = payload["json_field_descriptions"] + assert descriptions.startswith("## Top-Level Shape") + assert "temporary StudyAgent-specific contract" not in descriptions + assert "fitOutcomeModelArgs" in descriptions + + +@pytest.mark.mcp +def test_defaults_spec_is_cm_analysis_template_json() -> None: + mcp = DummyMCP() + cohort_methods_prompt_bundle.register(mcp) + fn = mcp.tools["cohort_methods_prompt_bundle"] + payload = fn() + defaults = payload["defaults_spec"] + assert isinstance(defaults, dict) + for key in COHORT_METHODS_SPEC_TOP_LEVEL_KEYS: + assert key in defaults, f"missing key in defaults_spec: {key}" + ok, missing = validate_cohort_methods_spec(defaults) + assert ok is True, f"defaults_spec failed top-level validation: {missing}" + + +@pytest.mark.mcp +def test_bundle_is_cached() -> None: + mcp = DummyMCP() + cohort_methods_prompt_bundle.register(mcp) + fn = mcp.tools["cohort_methods_prompt_bundle"] + a = fn() + b = fn() + assert a is b # same object identity means cached diff --git a/tests/test_cohort_methods_spec_validation.py b/tests/test_cohort_methods_spec_validation.py new file mode 100644 index 0000000..210f14c --- /dev/null +++ b/tests/test_cohort_methods_spec_validation.py @@ -0,0 +1,285 @@ +import pytest + +from study_agent_core.cohort_methods_spec_validation import ( + LLM_FILLED_SECTIONS, + COHORT_METHODS_SPEC_TOP_LEVEL_KEYS, + validate_section, + validate_cohort_methods_spec, +) + + +pytestmark = pytest.mark.core + + +def _minimal_valid_spec() -> dict: + return { + "description": "Study", + "getDbCohortMethodDataArgs": { + "studyStartDate": "", + "studyEndDate": "", + "firstExposureOnly": False, + "removeDuplicateSubjects": "keep all", + "restrictToCommonPeriod": False, + "washoutPeriod": 365, + "maxCohortSize": 0, + }, + "createStudyPopArgs": { + "removeSubjectsWithPriorOutcome": True, + "priorOutcomeLookback": 99999, + "minDaysAtRisk": 1, + "riskWindowStart": 1, + "startAnchor": "cohort start", + "riskWindowEnd": 0, + "endAnchor": "cohort end", + "censorAtNewRiskWindow": False, + }, + "trimByPsArgs": {"trimFraction": 0.05, "equipoiseBounds": None}, + "matchOnPsArgs": {"maxRatio": 1, "caliper": 0.2, "caliperScale": "standardized logit"}, + "stratifyByPsArgs": None, + "createPsArgs": { + "maxCohortSizeForFitting": 250000, + "errorOnHighCorrelation": True, + "prior": None, + "control": None, + }, + "fitOutcomeModelArgs": { + "modelType": "cox", + "stratified": False, + "useCovariates": False, + "inversePtWeighting": False, + "prior": None, + "control": None, + }, + } + + +def test_top_level_constants() -> None: + assert LLM_FILLED_SECTIONS == [ + "getDbCohortMethodDataArgs", + "createStudyPopArgs", + "propensityScoreAdjustment", + "fitOutcomeModelArgs", + ] + assert "description" in COHORT_METHODS_SPEC_TOP_LEVEL_KEYS + + +def test_validate_cohort_methods_spec_accepts_minimal() -> None: + ok, missing = validate_cohort_methods_spec(_minimal_valid_spec()) + assert ok is True + assert missing == [] + + +def test_validate_cohort_methods_spec_reports_missing_keys() -> None: + spec = _minimal_valid_spec() + del spec["fitOutcomeModelArgs"] + del spec["description"] + ok, missing = validate_cohort_methods_spec(spec) + assert ok is False + assert set(missing) == {"description", "fitOutcomeModelArgs"} + + +def test_validate_section_accepts_good_study_pop() -> None: + spec = _minimal_valid_spec() + ok, violations = validate_section("createStudyPopArgs", spec["createStudyPopArgs"]) + assert ok is True + assert violations == [] + + +def test_validate_section_flags_bad_enum() -> None: + bad = { + "modelType": "svm", + "stratified": False, + "useCovariates": False, + "inversePtWeighting": False, + "prior": None, + "control": None, + } + ok, violations = validate_section("fitOutcomeModelArgs", bad) + assert ok is False + assert any("modelType" in v for v in violations) + + +def test_validate_section_flags_range() -> None: + bad = { + "matchOnPsArgs": { + "maxRatio": -1, + "caliper": -0.5, + "caliperScale": "standardized", + } + } + ok, violations = validate_section("propensityScoreAdjustment", bad) + assert ok is False + assert any("caliper" in v for v in violations) + assert any("maxRatio" in v for v in violations) + + +def test_validate_section_rejects_unknown_section() -> None: + ok, violations = validate_section("unknownSection", {}) + assert ok is False + assert violations and "unknown section" in violations[0] + + +from study_agent_core.cohort_methods_spec_validation import ( + backfill_section_from_defaults, + merge_client_metadata, +) + + +def test_merge_client_metadata_overrides_llm_cohorts() -> None: + spec = _minimal_valid_spec() + client_cohort_defs = { + "targetCohort": {"id": 1, "name": "Real Target"}, + "comparatorCohort": {"id": 2, "name": "Real Comp"}, + "outcomeCohort": [{"id": 3, "name": "Real Outcome"}], + } + merged = merge_client_metadata( + spec, + cohort_definitions=client_cohort_defs, + negative_control={"id": 42, "name": "NC"}, + covariate_selection={"conceptsToInclude": [{"id": 7}], "conceptsToExclude": []}, + ) + assert merged["cohortDefinitions"]["targetCohort"]["id"] == 1 + assert merged["cohortDefinitions"]["targetCohort"]["name"] == "Real Target" + assert merged["cohortDefinitions"]["comparatorCohort"]["id"] == 2 + assert merged["negativeControlConceptSet"]["id"] == 42 + assert merged["covariateSelection"]["conceptsToInclude"] == [{"id": 7}] + + +def test_merge_client_metadata_leaves_name_alone() -> None: + spec = _minimal_valid_spec() + spec["description"] = "LLM-supplied study name" + merged = merge_client_metadata( + spec, + cohort_definitions={}, + negative_control={}, + covariate_selection={}, + ) + assert merged["description"] == "LLM-supplied study name" + + +def test_merge_client_metadata_does_not_mutate_input() -> None: + spec = _minimal_valid_spec() + merge_client_metadata( + spec, + cohort_definitions={"targetCohort": {"id": 42, "name": "X"}}, + negative_control={}, + covariate_selection={}, + ) + assert "cohortDefinitions" not in spec + + +def test_backfill_section_from_defaults_replaces_single_section() -> None: + spec = _minimal_valid_spec() + defaults = _minimal_valid_spec() + defaults["fitOutcomeModelArgs"] = {"modelType": "cox", "stratified": True, "useCovariates": False, "inversePtWeighting": False, "prior": None, "control": None} + spec["fitOutcomeModelArgs"] = {"modelType": "BROKEN"} + out = backfill_section_from_defaults(spec, defaults, "fitOutcomeModelArgs") + assert out["fitOutcomeModelArgs"]["modelType"] == "cox" + assert out["fitOutcomeModelArgs"]["stratified"] is True + assert out["createStudyPopArgs"] == spec["createStudyPopArgs"] # other sections untouched + + +def test_backfill_section_rejects_unknown_section() -> None: + spec = _minimal_valid_spec() + defaults = _minimal_valid_spec() + with pytest.raises(ValueError): + backfill_section_from_defaults(spec, defaults, "unknownSection") + + +from study_agent_core.cohort_methods_spec_validation import cohort_methods_spec_to_shell_recommendation + + +def _full_spec_with_tar() -> dict: + spec = _minimal_valid_spec() + spec["createStudyPopArgs"]["washoutPeriod"] = 365 + spec["createStudyPopArgs"]["startAnchor"] = "cohort start" + spec["createStudyPopArgs"]["riskWindowStart"] = 1 + spec["createStudyPopArgs"]["endAnchor"] = "cohort end" + spec["createStudyPopArgs"]["riskWindowEnd"] = 365 + return spec + + +def test_cohort_methods_spec_to_shell_separates_tar_keys() -> None: + spec = _full_spec_with_tar() + out = cohort_methods_spec_to_shell_recommendation( + cohort_methods_spec=spec, + raw_description="desc", + defaults_snapshot={"x": 1}, + profile_name="P", + input_method="typed_text", + rec_status="received", + ) + assert out["mode"] == "free_text" + assert out["source"] == "acp_flow" + assert out["status"] == "received" + assert out["profile_name"] == "P" + assert out["raw_description"] == "desc" + assert out["defaults_snapshot"] == {"x": 1} + tar = out["time_at_risk"] + assert tar["startAnchor"] == "cohort start" + assert tar["riskWindowStart"] == 1 + assert tar["endAnchor"] == "cohort end" + assert tar["riskWindowEnd"] == 365 + sp = out["study_population"] + assert "startAnchor" not in sp + assert "riskWindowStart" not in sp + assert sp["washoutPeriod"] == 365 + assert sp["cohortMethodDataArgs"] == spec["getDbCohortMethodDataArgs"] + assert out["propensity_score_adjustment"] == { + "trimByPsArgs": spec["trimByPsArgs"], + "matchOnPsArgs": spec["matchOnPsArgs"], + "stratifyByPsArgs": spec["stratifyByPsArgs"], + "createPsArgs": spec["createPsArgs"], + } + assert out["outcome_model"] == spec["fitOutcomeModelArgs"] + assert out["deferred_inputs"]["function_argument_description"] == "implemented" + + +def test_cohort_methods_spec_to_shell_honors_rec_status_backfilled() -> None: + out = cohort_methods_spec_to_shell_recommendation( + cohort_methods_spec=_minimal_valid_spec(), + raw_description="d", + defaults_snapshot={}, + profile_name="X", + input_method="description_argument", + rec_status="backfilled", + ) + assert out["status"] == "backfilled" + assert out["input_method"] == "description_argument" + + +def test_cohort_methods_spec_to_shell_handles_missing_sections() -> None: + out = cohort_methods_spec_to_shell_recommendation( + cohort_methods_spec={}, + raw_description="d", + defaults_snapshot={}, + profile_name="X", + input_method="typed_text", + rec_status="received", + ) + assert out["study_population"] == {} + assert out["time_at_risk"] == {} + assert out["propensity_score_adjustment"] == { + "trimByPsArgs": None, + "matchOnPsArgs": None, + "stratifyByPsArgs": None, + "createPsArgs": None, + } + assert out["outcome_model"] == {} + + +def test_cohort_methods_spec_to_shell_does_not_mutate_input() -> None: + spec = _full_spec_with_tar() + snapshot = {"profile_name": "snap"} + out = cohort_methods_spec_to_shell_recommendation( + cohort_methods_spec=spec, + raw_description="d", + defaults_snapshot=snapshot, + profile_name="X", + input_method="typed_text", + rec_status="received", + ) + out["study_population"]["washoutPeriod"] = 9999 + out["defaults_snapshot"]["profile_name"] = "mutated" + assert spec["createStudyPopArgs"]["washoutPeriod"] == 365 + assert snapshot["profile_name"] == "snap" diff --git a/tests/test_cohort_methods_specs_models.py b/tests/test_cohort_methods_specs_models.py new file mode 100644 index 0000000..5102308 --- /dev/null +++ b/tests/test_cohort_methods_specs_models.py @@ -0,0 +1,55 @@ +import pytest + +from study_agent_core.models import ( + CohortMethodSpecsRecommendationInput, + CohortMethodSpecsRecommendationOutput, +) + + +pytestmark = pytest.mark.core + + +def test_input_requires_description() -> None: + with pytest.raises(Exception): + CohortMethodSpecsRecommendationInput() # type: ignore[call-arg] + + +def test_input_accepts_minimal_payload() -> None: + payload = CohortMethodSpecsRecommendationInput( + analytic_settings_description="compare A vs B", + ) + assert payload.analytic_settings_description == "compare A vs B" + assert payload.study_intent == "" + assert payload.study_description is None + + +def test_input_accepts_full_wrapper_body() -> None: + payload = CohortMethodSpecsRecommendationInput( + analytic_settings_description="365-day washout, 1:1 PS match, Cox", + study_description="365-day washout, 1:1 PS match, Cox", + study_intent="CV outcomes comparative effectiveness", + ) + assert payload.study_intent == "CV outcomes comparative effectiveness" + assert payload.study_description == "365-day washout, 1:1 PS match, Cox" + + +def test_input_rejects_cohort_metadata_fields() -> None: + with pytest.raises(Exception): + CohortMethodSpecsRecommendationInput( + analytic_settings_description="365-day washout", + target_cohort_id=1001, + ) # type: ignore[call-arg] + + +def test_output_defaults() -> None: + out = CohortMethodSpecsRecommendationOutput(status="ok") + assert out.status == "ok" + assert out.recommendation == {} + assert out.cohort_methods_specifications is None + assert out.section_rationales == {} + assert out.diagnostics == {} + + +def test_output_rejects_unknown_status() -> None: + with pytest.raises(Exception): + CohortMethodSpecsRecommendationOutput(status="stub") # type: ignore[arg-type] diff --git a/tests/test_core_tools.py b/tests/test_core_tools.py index ee1dd52..6d6a065 100644 --- a/tests/test_core_tools.py +++ b/tests/test_core_tools.py @@ -1,9 +1,11 @@ import pytest from study_agent_core.tools import ( + cohort_methods_intent_split, cohort_lint, phenotype_intent_split, phenotype_improvements, + phenotype_recommendation_plan, phenotype_recommendations, phenotype_validation_review, propose_concept_set_diff, @@ -40,11 +42,75 @@ def test_cohort_lint_washout_and_inverted(): assert "inverted_window_0" in ids +@pytest.mark.core +def test_phenotype_recommendation_plan_stub(): + catalog = [ + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha"}, + {"phenotype_id": "cipher:2", "phenotype_name": "Beta"}, + ] + result = phenotype_recommendation_plan("protocol", catalog, max_shortlist=1) + assert result["mode"] == "stub" + assert result["shortlist_ids"] == ["ohdsi:1"] + + +@pytest.mark.core +def test_phenotype_recommendation_plan_llm_filters(): + catalog = [ + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha"}, + {"phenotype_id": "cipher:2", "phenotype_name": "Beta"}, + ] + llm = { + "plan": "plan", + "intent_facets": {"phenotype_role": "diagnosis"}, + "shortlist_ids": ["cipher:2", "missing:999", "ohdsi:1"], + "needs_more_search": False, + "reasoning_notes": ["note"], + } + result = phenotype_recommendation_plan("protocol", catalog, max_shortlist=2, llm_result=llm) + assert result["invalid_ids_filtered"] == ["missing:999"] + assert result["shortlist_ids"] == ["cipher:2", "ohdsi:1"] + + +@pytest.mark.core +def test_phenotype_recommendation_plan_maps_bare_ids_and_falls_back(): + catalog = [ + {"phenotype_id": "ohdsi:170", "phenotype_name": "Alpha"}, + {"phenotype_id": "cipher:17590", "phenotype_name": "Beta"}, + ] + llm = { + "plan": "plan", + "intent_facets": {"phenotype_role": "diagnosis"}, + "shortlist_ids": ["17590", "missing:999"], + "needs_more_search": False, + "reasoning_notes": ["note"], + } + result = phenotype_recommendation_plan("protocol", catalog, max_shortlist=2, llm_result=llm) + assert result["shortlist_ids"] == ["cipher:17590"] + assert result["invalid_ids_filtered"] == ["missing:999"] + + +@pytest.mark.core +def test_phenotype_recommendation_plan_coerces_string_reasoning_notes(): + catalog = [ + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha"}, + ] + llm = { + "plan": "plan", + "intent_facets": "not a dict", + "shortlist_ids": ["ohdsi:1"], + "needs_more_search": False, + "reasoning_notes": "single note", + } + result = phenotype_recommendation_plan("protocol", catalog, max_shortlist=1, llm_result=llm) + assert result["intent_facets"] == {} + assert result["reasoning_notes"] == ["single note"] + + @pytest.mark.core def test_phenotype_recommendations_stub(): catalog = [ - {"cohortId": 1, "cohortName": "Alpha"}, - {"cohortId": 2, "cohortName": "Beta"}, + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha"}, + {"phenotype_id": "cipher:2", "phenotype_name": "Beta"}, ] result = phenotype_recommendations("protocol", catalog, max_results=1) assert result["mode"] == "stub" @@ -54,20 +120,38 @@ def test_phenotype_recommendations_stub(): @pytest.mark.core def test_phenotype_recommendations_llm_filters(): catalog = [ - {"cohortId": 1, "cohortName": "Alpha"}, - {"cohortId": 2, "cohortName": "Beta"}, + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha"}, + {"phenotype_id": "cipher:2", "phenotype_name": "Beta"}, ] llm = { "phenotype_recommendations": [ - {"cohortId": 1, "cohortName": "Alpha", "justification": "ok"}, - {"cohortId": 999, "cohortName": "Nope"}, + {"phenotype_id": "ohdsi:1", "phenotype_name": "Alpha", "justification": "ok"}, + {"phenotype_id": "missing:999", "phenotype_name": "Nope"}, ] } result = phenotype_recommendations("protocol", catalog, max_results=2, llm_result=llm) - assert result["invalid_ids_filtered"] == [999] + assert result["invalid_ids_filtered"] == ["missing:999"] assert len(result["phenotype_recommendations"]) == 1 +@pytest.mark.core +def test_phenotype_recommendations_maps_bare_ids_and_falls_back(): + catalog = [ + {"phenotype_id": "ohdsi:181", "phenotype_name": "Alpha"}, + {"phenotype_id": "cipher:17590", "phenotype_name": "Beta"}, + ] + llm = { + "plan": "plan", + "phenotype_recommendations": [ + {"phenotype_id": "17590", "phenotype_name": "Beta", "justification": "ok"}, + {"phenotype_id": "missing:999", "phenotype_name": "Nope"}, + ] + } + result = phenotype_recommendations("protocol", catalog, max_results=2, llm_result=llm) + assert result["phenotype_recommendations"][0]["phenotype_id"] == "cipher:17590" + assert result["invalid_ids_filtered"] == ["missing:999"] + + @pytest.mark.core def test_phenotype_improvements_filters_targets(): cohorts = [{"id": 10}, {"id": 20}] @@ -121,3 +205,72 @@ def test_phenotype_intent_split_llm(): result = phenotype_intent_split("intent", llm_result=llm) assert result["target_statement"] == "Target" assert result["outcome_statement"] == "Outcome" + + +@pytest.mark.core +def test_cohort_methods_intent_split_llm(): + llm = { + "status": "ok", + "plan": "plan", + "target_statement": "Metformin users", + "comparator_statement": "Sulfonylurea users", + "outcome_statement": "GI bleeding", + "outcome_statements": ["GI bleeding", "Stroke"], + "rationale": "Because", + "questions": [], + } + result = cohort_methods_intent_split("intent", llm_result=llm) + assert result["status"] == "ok" + assert result["target_statement"] == "Metformin users" + assert result["comparator_statement"] == "Sulfonylurea users" + assert result["outcome_statement"] == "GI bleeding" + assert result["outcome_statements"] == ["GI bleeding", "Stroke"] + + +@pytest.mark.core +def test_cohort_methods_intent_split_backfills_outcome_statements(): + llm = { + "status": "ok", + "plan": "plan", + "target_statement": "Metformin users", + "comparator_statement": "Sulfonylurea users", + "outcome_statement": "GI bleeding", + "rationale": "Because", + "questions": [], + } + result = cohort_methods_intent_split("intent", llm_result=llm) + assert result["outcome_statement"] == "GI bleeding" + assert result["outcome_statements"] == ["GI bleeding"] + + +@pytest.mark.core +def test_cohort_methods_intent_split_requires_comparator_when_ok(): + llm = { + "status": "ok", + "plan": "plan", + "target_statement": "Metformin users", + "comparator_statement": "", + "outcome_statement": "GI bleeding", + "outcome_statements": ["GI bleeding"], + "rationale": "Because", + } + result = cohort_methods_intent_split("intent", llm_result=llm) + assert result["error"] == "invalid_cohort_methods_intent_split" + assert result["missing"] == ["comparator_statement"] + + +@pytest.mark.core +def test_cohort_methods_intent_split_allows_clarification(): + llm = { + "status": "needs_clarification", + "plan": "plan", + "target_statement": "", + "comparator_statement": "", + "outcome_statement": "", + "outcome_statements": [], + "rationale": "Intent is underspecified.", + "questions": ["What exposure should define the target cohort?"], + } + result = cohort_methods_intent_split("intent", llm_result=llm) + assert result["status"] == "needs_clarification" + assert result["questions"] == ["What exposure should define the target cohort?"] diff --git a/tests/test_keeper_interface_specs.py b/tests/test_keeper_interface_specs.py index 2005adc..15566de 100644 --- a/tests/test_keeper_interface_specs.py +++ b/tests/test_keeper_interface_specs.py @@ -87,3 +87,14 @@ def test_service_registry_declares_case_causal_review_flow() -> None: "get_case_review_drug_label_details", "get_case_review_report_literature_stub", ] + assert service["validation"]["controlled_identifier_keys"] == [ + "ingred_rxcui", + "rxcui", + "adverse_event_meddra_id", + "meddra_code", + "meddra_id", + "concept_id", + "ingredient_concept_id", + "adverse_event_concept_id", + "outcome_concept_id", + ] diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py index 1343061..8278cb3 100644 --- a/tests/test_llm_client.py +++ b/tests/test_llm_client.py @@ -29,7 +29,7 @@ def test_call_llm_chat_completions_success(monkeypatch): "content": json.dumps( { "plan": "ok", - "phenotype_recommendations": [{"cohortId": 1}], + "phenotype_recommendations": [{"phenotype_id": "ohdsi:1"}], } ) } @@ -143,3 +143,158 @@ def test_call_llm_missing_required_keys(monkeypatch): result = llm_client.call_llm("prompt", required_keys=["advice", "next_steps"]) assert result.status == "schema_mismatch" assert result.missing_keys == ["next_steps"] + + +@pytest.mark.acp +def test_call_llm_uses_later_json_object_matching_required_keys(monkeypatch): + schema_echo = { + "type": "object", + "properties": { + "status": {"type": "string"}, + "target_statement": {"type": "string"}, + }, + } + actual_output = { + "status": "ok", + "target_statement": "Metformin initiators.", + "comparator_statement": "Sulfonylurea initiators.", + "outcome_statement": "GI bleeding.", + "outcome_statements": ["GI bleeding.", "MACE."], + "rationale": "Comparative cohort method intent.", + } + payload = { + "choices": [ + { + "message": { + "content": f"{json.dumps(schema_echo)}\n{json.dumps(actual_output)}" + } + } + ] + } + + monkeypatch.setenv("LLM_API_KEY", "secret") + monkeypatch.setenv("LLM_USE_RESPONSES", "0") + monkeypatch.setattr( + llm_client.urllib.request, + "urlopen", + lambda request, timeout=0: _FakeResponse(json.dumps(payload)), + ) + + result = llm_client.call_llm( + "prompt", + required_keys=[ + "status", + "target_statement", + "comparator_statement", + "outcome_statement", + "outcome_statements", + "rationale", + ], + ) + assert result.status == "ok" + assert result.parsed_content["target_statement"] == "Metformin initiators." + assert result.parsed_content["outcome_statements"] == ["GI bleeding.", "MACE."] + + +@pytest.mark.acp +def test_call_llm_recovers_schema_wrapped_properties_output(monkeypatch): + schema_wrapped_output = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "cohort_methods_intent_split_output", + "type": "object", + "properties": { + "status": "ok", + "plan": "Derived cohort statements.", + "target_statement": "Patients treated with Ticagrelor.", + "comparator_statement": "Patients treated with Clopidogrel.", + "outcome_statement": "Cardiovascular events.", + "outcome_statements": ["Myocardial infarction", "Stroke"], + "rationale": "Comparative antiplatelet study intent.", + "questions": [], + }, + "required": [ + "status", + "plan", + "target_statement", + "comparator_statement", + "outcome_statement", + "outcome_statements", + "rationale", + ], + "additionalProperties": False, + } + payload = {"choices": [{"message": {"content": json.dumps(schema_wrapped_output)}}]} + + monkeypatch.setenv("LLM_API_KEY", "secret") + monkeypatch.setenv("LLM_USE_RESPONSES", "0") + monkeypatch.setattr( + llm_client.urllib.request, + "urlopen", + lambda request, timeout=0: _FakeResponse(json.dumps(payload)), + ) + + result = llm_client.call_llm( + "prompt", + required_keys=[ + "status", + "target_statement", + "comparator_statement", + "outcome_statement", + "outcome_statements", + "rationale", + ], + ) + assert result.status == "ok" + assert result.schema_valid is True + assert result.parsed_content["status"] == "ok" + assert result.parsed_content["target_statement"] == "Patients treated with Ticagrelor." + assert result.parsed_content["outcome_statements"] == ["Myocardial infarction", "Stroke"] + + +@pytest.mark.acp +def test_call_llm_does_not_recover_plain_schema_echo(monkeypatch): + schema_echo = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "cohort_methods_intent_split_output", + "type": "object", + "properties": { + "status": {"type": "string", "enum": ["ok", "needs_clarification"]}, + "target_statement": {"type": "string"}, + "comparator_statement": {"type": "string"}, + "outcome_statement": {"type": "string"}, + "outcome_statements": {"type": "array", "items": {"type": "string"}}, + "rationale": {"type": "string"}, + }, + "required": [ + "status", + "target_statement", + "comparator_statement", + "outcome_statement", + "outcome_statements", + "rationale", + ], + "additionalProperties": False, + } + payload = {"choices": [{"message": {"content": json.dumps(schema_echo)}}]} + + monkeypatch.setenv("LLM_API_KEY", "secret") + monkeypatch.setenv("LLM_USE_RESPONSES", "0") + monkeypatch.setattr( + llm_client.urllib.request, + "urlopen", + lambda request, timeout=0: _FakeResponse(json.dumps(payload)), + ) + + result = llm_client.call_llm( + "prompt", + required_keys=[ + "status", + "target_statement", + "comparator_statement", + "outcome_statement", + "outcome_statements", + "rationale", + ], + ) + assert result.status == "schema_mismatch" + assert result.schema_valid is False diff --git a/tests/test_mcp_phenotype_summary.py b/tests/test_mcp_phenotype_summary.py new file mode 100644 index 0000000..d7f6c91 --- /dev/null +++ b/tests/test_mcp_phenotype_summary.py @@ -0,0 +1,58 @@ +import json +from pathlib import Path + +import pytest + +from study_agent_mcp.retrieval.index import PhenotypeIndex + + +@pytest.mark.mcp +def test_fetch_summary_exposes_retrieval_fields(tmp_path) -> None: + index_dir = tmp_path / "index" + index_dir.mkdir() + catalog_path = index_dir / "catalog.jsonl" + row = { + "phenotype_id": "cipher:test-1", + "source_dataset": "va_cipher", + "source_record_type": "disease_phenotype", + "name": "Post-traumatic stress disorder", + "short_description": "PTSD phenotype", + "tags": ["General"], + "raw_keywords": ["veteran"], + "retrieval_keywords": ["PTSD", "trauma cohort"], + "retrieval_keywords_source": "llm_cached", + "retrieval_concept_labels": ["ICD-10 Diagnostic Codes", "PTSD"], + "methodology_summary": "Codes and narrative evidence for PTSD.", + "primary_clinical_topic": "Post-traumatic stress disorder", + "secondary_topics": ["trauma"], + "phenotype_role": "diagnosis", + "care_setting_scope": "outpatient", + "population_scope": "veterans", + "topic_mentions": {"primary_topics": ["post-traumatic stress disorder"], "context_only_topics": [], "downstream_or_related_topics": ["trauma"]}, + "target_vs_context_conditions": {"target_conditions": ["post-traumatic stress disorder"], "context_conditions": []}, + "exclude_from_primary_topic_match": ["trauma study context"], + "recommendation_summary": "PTSD diagnosis phenotype for veterans.", + "recommendation_metadata_source": "llm_cached", + "signals": ["source:cipher", "execution:codes_only"], + "ontology_keys": [], + "code_systems": [], + "executable_definition_status": "codes_only", + "execution_readiness_score": 0.45, + "adaptation_notes": "Requires translation to OHDSI logic.", + } + catalog_path.write_text(json.dumps(row) + "\n", encoding="utf-8") + (index_dir / "meta.json").write_text(json.dumps({"catalog_count": 1}), encoding="utf-8") + + idx = PhenotypeIndex(str(index_dir), allow_dense=False, allow_sparse=False).load() + summary = idx.fetch_summary("cipher:test-1") + + assert summary is not None + assert summary["raw_keywords"] == ["veteran"] + assert summary["retrieval_keywords"] == ["PTSD", "trauma cohort"] + assert summary["retrieval_keywords_source"] == "llm_cached" + assert summary["retrieval_concept_labels"] == ["ICD-10 Diagnostic Codes", "PTSD"] + assert summary["methodology_summary"] == "Codes and narrative evidence for PTSD." + assert summary["primary_clinical_topic"] == "Post-traumatic stress disorder" + assert summary["phenotype_role"] == "diagnosis" + assert summary["care_setting_scope"] == "outpatient" + assert summary["recommendation_metadata_source"] == "llm_cached" diff --git a/tests/test_mcp_prompt_bundle.py b/tests/test_mcp_prompt_bundle.py index 890c37d..90f8f3d 100644 --- a/tests/test_mcp_prompt_bundle.py +++ b/tests/test_mcp_prompt_bundle.py @@ -15,6 +15,30 @@ def decorator(fn): return decorator +@pytest.mark.mcp +def test_prompt_bundle_intent_facets_schema() -> None: + mcp = DummyMCP() + phenotype_prompt_bundle.register(mcp) + fn = mcp.tools["phenotype_prompt_bundle"] + payload = fn("phenotype_recommendation_intent_facets") + assert "overview" in payload + assert "spec" in payload + assert "output_schema" in payload + assert payload["output_schema"]["title"] == "phenotype_recommendation_intent_facets_output" + + +@pytest.mark.mcp +def test_prompt_bundle_plan_schema() -> None: + mcp = DummyMCP() + phenotype_prompt_bundle.register(mcp) + fn = mcp.tools["phenotype_prompt_bundle"] + payload = fn("phenotype_recommendation_plan") + assert "overview" in payload + assert "spec" in payload + assert "output_schema" in payload + assert payload["output_schema"]["title"] == "phenotype_recommendation_plan_output" + + @pytest.mark.mcp def test_prompt_bundle_tool_returns_schema() -> None: mcp = DummyMCP() @@ -27,6 +51,20 @@ def test_prompt_bundle_tool_returns_schema() -> None: assert payload["output_schema"]["title"] == "phenotype_recommendations_output" +@pytest.mark.mcp +def test_cohort_methods_intent_split_bundle_schema() -> None: + from study_agent_mcp.tools import cohort_methods_intent_split + + mcp = DummyMCP() + cohort_methods_intent_split.register(mcp) + fn = mcp.tools["cohort_methods_intent_split"] + payload = fn() + assert "overview" in payload + assert "spec" in payload + assert "output_schema" in payload + assert payload["output_schema"]["title"] == "cohort_methods_intent_split_output" + + @pytest.mark.mcp def test_prompt_bundle_improvements_schema() -> None: mcp = DummyMCP() diff --git a/tests/test_mcp_tools_registry.py b/tests/test_mcp_tools_registry.py index 3f633b4..dec44d0 100644 --- a/tests/test_mcp_tools_registry.py +++ b/tests/test_mcp_tools_registry.py @@ -22,6 +22,7 @@ def test_register_all_tools() -> None: assert set(mcp.registered) == { "propose_concept_set_diff", "cohort_lint", + "cohort_methods_intent_split", "phenotype_recommendations", "phenotype_improvements", "phenotype_intent_split", @@ -55,4 +56,5 @@ def test_register_all_tools() -> None: "vocab_remove_descendants", "vocab_add_nonchildren", "vocab_fetch_concepts", + "cohort_methods_prompt_bundle", } diff --git a/tests/test_retrieval_index.py b/tests/test_retrieval_index.py new file mode 100644 index 0000000..b725cb8 --- /dev/null +++ b/tests/test_retrieval_index.py @@ -0,0 +1,52 @@ +import pytest + +from study_agent_mcp.retrieval import PhenotypeIndex + + +@pytest.mark.mcp +def test_search_normalizes_dense_and_sparse_scores_before_weighting(monkeypatch): + index = PhenotypeIndex(index_dir="/tmp", allow_dense=False, allow_sparse=False) + index._catalog = [ + {"phenotype_id": "a", "name": "A", "short_description": "A", "tags": [], "signals": [], "executable_definition_status": "native_ohdsi", "execution_readiness_score": 1.0}, + {"phenotype_id": "b", "name": "B", "short_description": "B", "tags": [], "signals": [], "executable_definition_status": "native_ohdsi", "execution_readiness_score": 1.0}, + {"phenotype_id": "c", "name": "C", "short_description": "C", "tags": [], "signals": [], "executable_definition_status": "native_ohdsi", "execution_readiness_score": 1.0}, + ] + index._dense = object() + index.embedding_client = object() + index._sparse = {"enabled": True} + + monkeypatch.setattr(index, "_dense_search", lambda query, top_k: {0: 0.9, 1: 0.6}) + monkeypatch.setattr(index, "_sparse_search", lambda query, top_k: {1: 25.0, 2: 10.0}) + + results = index.search( + query="test", + top_k=3, + dense_weight=0.8, + sparse_weight=0.2, + ) + + assert [row["phenotype_id"] for row in results] == ["a", "b", "c"] + + by_id = {row["phenotype_id"]: row for row in results} + assert by_id["a"]["score_dense"] == pytest.approx(1.0) + assert by_id["a"]["score_sparse"] is None + assert by_id["a"]["score"] == pytest.approx(0.8) + + assert by_id["b"]["score_dense"] == pytest.approx(0.0) + assert by_id["b"]["score_sparse"] == pytest.approx(1.0) + assert by_id["b"]["score"] == pytest.approx(0.2) + + assert by_id["c"]["score_dense"] is None + assert by_id["c"]["score_sparse"] == pytest.approx(0.0) + assert by_id["c"]["score"] == pytest.approx(0.0) + + assert by_id["a"]["score_dense_raw"] == pytest.approx(0.9) + assert by_id["b"]["score_sparse_raw"] == pytest.approx(25.0) + + +@pytest.mark.mcp +def test_normalize_score_map_returns_one_for_flat_scores(): + from study_agent_mcp.retrieval.index import _normalize_score_map + + normalized = _normalize_score_map({1: 4.0, 2: 4.0}) + assert normalized == {1: 1.0, 2: 1.0} diff --git a/tests/test_service_registry_helper.py b/tests/test_service_registry_helper.py new file mode 100644 index 0000000..fa81cbd --- /dev/null +++ b/tests/test_service_registry_helper.py @@ -0,0 +1,41 @@ +import textwrap + +import pytest + +from study_agent_mcp.tools import _service_registry + + +@pytest.mark.mcp +def test_get_controlled_identifier_keys_reads_service_registry_override(tmp_path, monkeypatch) -> None: + registry = tmp_path / "services.yaml" + registry.write_text( + textwrap.dedent( + """ + services: + case_causal_review: + validation: + controlled_identifier_keys: + - ingred_rxcui + - adverse_event_meddra_id + """ + ).strip(), + encoding="utf-8", + ) + monkeypatch.setenv("STUDY_AGENT_SERVICE_REGISTRY", str(registry)) + _service_registry.load_service_registry.cache_clear() + + keys = _service_registry.get_controlled_identifier_keys("case_causal_review", {"fallback_key"}) + + assert keys == frozenset({"ingred_rxcui", "adverse_event_meddra_id"}) + + +@pytest.mark.mcp +def test_get_controlled_identifier_keys_falls_back_on_invalid_registry(tmp_path, monkeypatch) -> None: + registry = tmp_path / "services.yaml" + registry.write_text("services:\n case_causal_review:\n validation: []\n", encoding="utf-8") + monkeypatch.setenv("STUDY_AGENT_SERVICE_REGISTRY", str(registry)) + _service_registry.load_service_registry.cache_clear() + + keys = _service_registry.get_controlled_identifier_keys("case_causal_review", {"fallback_key"}) + + assert keys == frozenset({"fallback_key"})