Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
e0e17d6
Added the ability of an ACP tool specification to white list certain …
rkboyce Apr 23, 2026
a9023cc
documentation changes and addition of fields for case reviews
rkboyce Apr 23, 2026
fd75c5d
created strategus_cohort_methods_shell.R, implemented scaffold
oneash98 Apr 16, 2026
6023bd6
.codex
oneash98 Apr 16, 2026
57619ca
Add cohort-method phenotype statement scaffold and recommendation flow
oneash98 Apr 20, 2026
46daab8
Refine cohort method analytic-settings shell UX
oneash98 Apr 24, 2026
1fae3d6
Add CohortMethod analysis settings JSON output
oneash98 Apr 27, 2026
c0790fc
Add cohort methods intent split and sequential improvement flow
oneash98 Apr 28, 2026
b154fc5
feat(core): add cohort method specs recommendation envelope models
burnout909 Apr 22, 2026
9aadd30
feat(core): add Theseus top-level and section validators
burnout909 Apr 22, 2026
95803eb
feat(core): add Theseus metadata merge and per-section backfill
burnout909 Apr 22, 2026
5eff00e
feat(mcp): add cohort_methods_prompt_bundle tool
burnout909 Apr 22, 2026
4843b41
feat(acp): add cohort methods specs recommendation flow
burnout909 Apr 22, 2026
12a52d1
feat(acp): register /flows/cohort_methods_specifications_recommendation
burnout909 Apr 22, 2026
af01d33
feat(R): add suggestCohortMethodSpecs wrapper
burnout909 Apr 22, 2026
460dc0b
docs: register cohort methods specs flow and add R usage example
burnout909 Apr 22, 2026
fbf8d47
test: add smoke task for cohort methods specs flow
burnout909 Apr 22, 2026
ab5188d
refactor(core): align cohort methods specs envelope with R shell cont…
burnout909 Apr 29, 2026
7b2a0e4
feat(core): add theseus_to_hanjae_recommendation projector
burnout909 Apr 29, 2026
7ce33a2
refactor(acp): rewrite cohort_methods_specs flow for R shell contract
burnout909 Apr 29, 2026
51818f7
refactor(acp): wire flat cohort-methods specs fields through HTTP route
burnout909 Apr 29, 2026
6f4fe96
refactor(R): align suggestCohortMethodSpecs with cohort-methods shell…
burnout909 Apr 29, 2026
12307e0
test: update cohort methods specs smoke payload to flat shell contract
burnout909 Apr 29, 2026
0336976
docs: refresh cohort methods specs flow registry and R usage example
burnout909 Apr 29, 2026
abae11f
fix(acp): use parsed_content/content_text in cohort-methods specs flow
Apr 29, 2026
8125578
docs: add cohort methods specs design, plan, test runbook, and handof…
Apr 29, 2026
97480d5
First generation CIPHER + OHDSI Phenotype library indexing approach. …
rkboyce Apr 29, 2026
640bde5
added ability for LLM suggestions of keywords for phenoytpes
rkboyce Apr 29, 2026
40e7128
Split LLM-assisted sparse and dense indexing into two steps and docum…
rkboyce Apr 30, 2026
6484b31
Progress on the agentic phenotype recommender using sparse and dense …
rkboyce May 1, 2026
548a900
More progress on phenotype recommendation indexing and agentic-based …
rkboyce May 2, 2026
7a42006
Update cohort methods recommendation prompt templates
oneash98 May 3, 2026
87554aa
Improve CohortMethod analytic settings ACP review flow
oneash98 May 3, 2026
89d733d
refactor(cohort-methods): align specs wrapper and validation naming
oneash98 May 3, 2026
04e14cc
Make script 06 run Strategus cohort method analysis
oneash98 May 3, 2026
6dae4cc
Clean up Strategus cohort methods shell docs and outputs
oneash98 May 3, 2026
7c53d8b
Fix intent split parsing and multi-outcome shell prompts
oneash98 May 4, 2026
894e567
Fix CohortMethod Strategus spec JSON serialization
oneash98 May 4, 2026
bbe7739
Add short analysis labels for CohortMethod shell, Create cohort metho…
oneash98 May 4, 2026
692bc5e
Refining the phenotype_recommendation agentic workflow further
rkboyce May 4, 2026
c074d78
Improvements after broader testing
rkboyce May 4, 2026
f463eae
Edited a testing file with cases recommended for initial human-review
rkboyce May 4, 2026
3fa6127
Completes the work on phenotype recommendation for this round. Adds e…
rkboyce May 4, 2026
203b557
Added an ACP endpoint to pull in phenotype metadata and definition JSON
rkboyce May 5, 2026
2e6c73b
Ported R script and test package to current ACP endpoints and service…
rkboyce May 5, 2026
ad2308d
bumped python version req to 3.12 and added omop-alchemy dependency (…
rkboyce May 5, 2026
d81f895
added a bash phenotype_recommendation benchmarking script
rkboyce May 6, 2026
e9ee1d2
Added Powershell and Bash benchmark scripts for phen rec
rkboyce May 6, 2026
3bb9a11
Added draft mermaid of the current phenotype recommendation flow
rkboyce May 7, 2026
95ea98a
format fix to mermaid
rkboyce May 7, 2026
58bec60
One more syntax fix
rkboyce May 7, 2026
850b343
Merge PR #38 cohort methods into atlas hardening branch
rkboyce May 9, 2026
5adcf75
adds [tests/cohort_methods_intent_split_smoke_test.py](/ai-agent/Hade…
rkboyce May 9, 2026
777d52a
Fix R cohort shell phenotype recommendation mapping
rkboyce May 9, 2026
bc4bee2
Merge branch 'main' into integration/pr38-cohort-methods
rkboyce May 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
demo-strategus-cohort-incidence/*
demo-strategus-cohort-methods/*
conda-pip-env-github-workflow.yml
*sandbox*
*.doit.db*
Expand All @@ -9,6 +10,12 @@ docs/study-agent-development-phenotype-suggestion-tool-architecture-plan.pdf
demo/417_Acute_gastrointestinal_bleeding_events.json
demo/299_Acute_gastrointestinal_bleeding_or_perforation_events.json
demo/1197_Acute_gastrointestinal_bleeding.json
test*.R
!R/OHDSIAssistant/tests/testthat.R
!R/OHDSIAssistant/tests/testthat/test-step-by-step-analytic-settings.R
.obsidian/
.DS_Store
.codex

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
1 change: 1 addition & 0 deletions CODING_AGENT_README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ The currently exposed ACP flow endpoints are:
- `phenotype_recommendation_advice`
- `phenotype_improvements`
- `phenotype_intent_split`
- `cohort_methods_intent_split`
- `concept_sets_review`
- `cohort_critique_general_design`
- `keeper_concept_sets_generate`
Expand Down
76 changes: 76 additions & 0 deletions CohortMethod_HANDOFF.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@

## What Was Implemented

We added the current CohortMethod shell path around
`OHDSIAssistant::runStrategusCohortMethodsShell()`. In broad terms, the work now
supports:

- CohortMethod-specific intent splitting from one study-intent sentence into
target, comparator, and one or more outcome statements.
- We kept the existing `phenotype_intent_split` flow unchanged and added a
separate `cohort_methods_intent_split` flow. Please decide whether these
should remain separate or be consolidated later.
- Phenotype recommendation and optional improvement for target, comparator, and
outcome cohorts.
- Negative-control and covariate concept-set selection remain future work. This
step currently implements only the scaffold; it should be completed once ACP
flows for suggesting negative controls and covariate concept sets are
available.
- CohortMethod analytic-settings collection through either `step_by_step` prompts
or `free_text` ACP recommendation.
- Generation of reproducible output artifacts, CohortMethod analytic-settings
JSON, and Strategus R scripts including `scripts/06_cm_spec.R`

### Main ACP/MCP/core additions:

- ACP flow endpoint:
- `/flows/cohort_methods_intent_split`
- ACP flow endpoint: `/flows/cohort_methods_specifications_recommendation`
- MCP tool:
- `cohort_methods_intent_split`
- MCP tool: `cohort_methods_prompt_bundle`
- R helper: `OHDSIAssistant::suggestCohortMethodSpecs()`
- R shell integration: `runStrategusCohortMethodsShell()`
- Prompt/template assets:
- CohortMethod intent-split overview/spec/schema assets.
- CohortMethod cmAnalysis template and instruction assets under
`mcp_server/prompts/cohort_methods/`.

## Read These For Details

- Shell workflow, output layout, generated scripts, current boundaries, and
analytic-settings prompt details:
- `docs/STRATEGUS_COHORT_METHODS_SHELL.md`
- End-to-end workflow diagrams:
- `docs/COHORT_METHODS_WORKFLOW.md`
- Free-text analytic-settings recommendation flow, endpoint shape, response
shape, and projection from cmAnalysis-style specs:
- `docs/COHORT_METHODS_SPECIFICATIONS_RECOMMENDATION_DESIGN.md`
- CohortMethod ACP/MCP service registry entries:
- `docs/SERVICE_REGISTRY.yaml`
- CohortMethod cmAnalysis prompt/template assets:
- `mcp_server/prompts/cohort_methods/cmAnalysis_template.json`
- `mcp_server/prompts/cohort_methods/CM_ANALYSIS_TEMPLATE.md`
- `mcp_server/prompts/cohort_methods/instruction_cohort_methods_specs.md`
- `mcp_server/prompts/cohort_methods/output_style_cohort_methods_specs.md`
- CohortMethod intent-split prompt/schema assets:
- `mcp_server/prompts/phenotype/overview_cohort_methods_intent_split.md`
- `mcp_server/prompts/phenotype/spec_cohort_methods_intent_split.md`
- `mcp_server/prompts/phenotype/output_schema_cohort_methods_intent_split.json`
- R usage examples:
- `R/OHDSIAssistant/README.md`
- Test and smoke-test commands:
- `docs/TESTING.md`

## Remaining TODO


Future CohortMethod coverage:

- Replace dummy negative-control and covariate concept-set artifacts with real
ACP/MCP-backed workflows.
- The current placeholder path cannot exclude high-correlation covariates, which can cause script `06_cm_spec.R` to fail when `errorOnHighCorrelation` is enabled.
- Properly implement `scripts/04_keeper_review.R` for CohortMethod outputs.
- Support multiple CohortMethod analyses
- Add validation for cohort-method analytic-settings recommendations before
they are accepted into the shell/generated scripts.
2 changes: 2 additions & 0 deletions R/OHDSIAssistant/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ export(readStrategusDbDetails)
export(createStrategusConnectionDetails)
export(readStrategusExecutionSettings)
export(createStrategusExecutionSettings)
export(runStrategusCohortMethodsShell)
export(suggestCohortMethodSpecs)
210 changes: 210 additions & 0 deletions R/OHDSIAssistant/R/cohort_methods_workflow.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#' Suggest cohort method study specifications from a free-text description.
#'
#' Calls the ACP flow `/flows/cohort_methods_specifications_recommendation`
#' and returns the cohort-methods recommendation, full analysis spec for
#' traceability, and per-section rationales. Falls back to a local stub
#' when `acp_state$url` is NULL.
#'
#' @param studyIntent protocol context string
#' @param analyticSettingsDescription free-text description of the study design
#' @param interactive when TRUE, prints a section summary (default: TRUE)
#' @return list response from ACP flow or local stub
#' @export
suggestCohortMethodSpecs <- function(studyIntent,
analyticSettingsDescription,
interactive = TRUE) {
if (is.null(studyIntent) || !nzchar(trimws(studyIntent))) {
stop("Provide a non-empty studyIntent.")
}
if (is.null(analyticSettingsDescription) || !nzchar(trimws(analyticSettingsDescription))) {
stop("Provide a non-empty analyticSettingsDescription.")
}

body <- list(
study_intent = trimws(as.character(studyIntent)),
study_description = trimws(as.character(analyticSettingsDescription)),
analytic_settings_description = trimws(as.character(analyticSettingsDescription))
)

res <- if (!is.null(acp_state$url)) {
.acp_post("/flows/cohort_methods_specifications_recommendation", body)
} else {
local_cohort_method_specs(body)
}

if (isTRUE(interactive)) {
cat("\n== Cohort Method Specifications ==\n")
cat("Status:", res$status %||% "(missing)", "\n")
rec <- res$recommendation %||% list()
if (length(rec) > 0) {
cat("Profile:", rec$profile_name %||% "(none)", "\n")
cat("Recommendation status:", rec$status %||% "(none)", "\n")
}
rats <- res$section_rationales %||% list()
if (length(rats) > 0) {
for (section in names(rats)) {
entry <- rats[[section]]
cat(sprintf(" - %s: confidence=%s %s\n",
section,
entry$confidence %||% "?",
entry$rationale %||% ""))
}
}
failed <- res$diagnostics$failed_sections %||% list()
if (length(failed) > 0) {
cat("Backfilled sections:", paste(unlist(failed), collapse = ", "), "\n")
}
.studyAgentPrintCohortMethodSpecsSummary(rec)
}
invisible(res)
}

.studyAgentCmSpecIsPresent <- function(value) {
if (is.null(value) || length(value) == 0) return(FALSE)
if (length(value) == 1 && is.atomic(value) && is.na(value)) return(FALSE)
TRUE
}

.studyAgentCmSpecValue <- function(value, path = NULL) {
`%||%` <- function(x, y) if (is.null(x)) y else x
if (!.studyAgentCmSpecIsPresent(value)) return("<not set>")
if (is.character(value) && length(value) == 1 && !nzchar(trimws(value))) return("<blank>")
if (is.logical(value) && length(value) == 1) return(if (isTRUE(value)) "Yes" else "No")
if (is.character(value) && length(value) == 1) {
mapped <- switch(
path %||% "",
"startAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]],
"endAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]],
"ps_strategy" = c("match_on_ps" = "Match on propensity score", "stratify_by_ps" = "Stratify on propensity score", "none" = "None")[[value]],
"caliperScale" = c("propensity score" = "Propensity score", "standardized" = "Standardized", "standardized logit" = "Standardized logit")[[value]],
"modelType" = c("cox" = "Cox proportional hazards", "poisson" = "Poisson regression", "logistic" = "Logistic regression")[[value]],
"removeDuplicateSubjects" = c(
"keep all" = "Keep All",
"keep first" = "Keep First",
"remove all" = "Remove All",
"keep first, truncate to second" = "Keep First, Truncate to Second"
)[[value]],
NULL
)
if (!is.null(mapped) && length(mapped) == 1 && !is.na(mapped)) return(mapped)
return(value)
}
if (is.numeric(value) && length(value) == 1) {
return(trimws(formatC(as.numeric(value), format = "fg", digits = 10)))
}
paste(as.character(value), collapse = ", ")
}

.studyAgentCmSpecRegularized <- function(args) {
if (!is.list(args)) return(FALSE)
.studyAgentCmSpecIsPresent(args$prior)
}

.studyAgentCmSpecPsStrategy <- function(ps) {
if (!is.list(ps)) return("none")
if (.studyAgentCmSpecIsPresent(ps$matchOnPsArgs)) return("match_on_ps")
if (.studyAgentCmSpecIsPresent(ps$stratifyByPsArgs)) return("stratify_by_ps")
"none"
}

.studyAgentCmSpecTrimming <- function(trim_args) {
if (!is.list(trim_args) || length(trim_args) == 0) return("None")
if (.studyAgentCmSpecIsPresent(trim_args$equipoiseBounds)) return("By equipoise")
if (.studyAgentCmSpecIsPresent(trim_args$trimFraction)) {
fraction <- suppressWarnings(as.numeric(trim_args$trimFraction))
if (length(fraction) == 1 && !is.na(fraction) && fraction > 0) {
return(sprintf("By percent (%s%%)", trimws(formatC(fraction * 100, format = "fg", digits = 6))))
}
}
"None"
}

.studyAgentCmSpecPrintSection <- function(title, rows) {
cat(sprintf("[%s]\n", title))
for (row in rows) {
cat(sprintf(" - %s: %s\n", row[[1]], row[[2]]))
}
}

.studyAgentPrintCohortMethodSpecsSummary <- function(recommendation) {
if (!is.list(recommendation)) return(invisible(NULL))
study_population <- recommendation$study_population %||% list()
time_at_risk <- recommendation$time_at_risk %||% list()
ps <- recommendation$propensity_score_adjustment %||% list()
outcome <- recommendation$outcome_model %||% list()
cohort_method_data <- study_population$cohortMethodDataArgs %||% list()
if (length(study_population) == 0 && length(time_at_risk) == 0 && length(ps) == 0 && length(outcome) == 0) {
return(invisible(NULL))
}

cat("\n")
.studyAgentCmSpecPrintSection("Study Population", list(
list("Study start date", .studyAgentCmSpecValue(cohort_method_data$studyStartDate)),
list("Study end date", .studyAgentCmSpecValue(cohort_method_data$studyEndDate)),
list("Restrict to common period", .studyAgentCmSpecValue(cohort_method_data$restrictToCommonPeriod)),
list("First exposure only", .studyAgentCmSpecValue(cohort_method_data$firstExposureOnly)),
list("Washout period", .studyAgentCmSpecValue(cohort_method_data$washoutPeriod)),
list("Remove duplicate subjects", .studyAgentCmSpecValue(cohort_method_data$removeDuplicateSubjects, "removeDuplicateSubjects")),
list("Censor at new risk window", .studyAgentCmSpecValue(study_population$censorAtNewRiskWindow)),
list("Remove prior outcomes", .studyAgentCmSpecValue(study_population$removeSubjectsWithPriorOutcome)),
list("Prior outcome lookback", .studyAgentCmSpecValue(study_population$priorOutcomeLookback)),
list("Maximum cohort size", .studyAgentCmSpecValue(cohort_method_data$maxCohortSize))
))
.studyAgentCmSpecPrintSection("Time At Risk", list(
list("Minimum days at risk", .studyAgentCmSpecValue(study_population$minDaysAtRisk)),
list("Risk window start", .studyAgentCmSpecValue(time_at_risk$riskWindowStart)),
list("Risk window start anchor", .studyAgentCmSpecValue(time_at_risk$startAnchor, "startAnchor")),
list("Risk window end", .studyAgentCmSpecValue(time_at_risk$riskWindowEnd)),
list("Risk window end anchor", .studyAgentCmSpecValue(time_at_risk$endAnchor, "endAnchor"))
))
.studyAgentCmSpecPrintSection("Propensity Score Adjustment", list(
list("PS trimming", .studyAgentCmSpecTrimming(ps$trimByPsArgs)),
list("PS adjustment strategy", .studyAgentCmSpecValue(.studyAgentCmSpecPsStrategy(ps), "ps_strategy")),
list("Max cohort size for PS fitting", .studyAgentCmSpecValue(ps$createPsArgs$maxCohortSizeForFitting)),
list("Test covariate correlation", .studyAgentCmSpecValue(ps$createPsArgs$errorOnHighCorrelation)),
list("Use regularization", .studyAgentCmSpecValue(.studyAgentCmSpecRegularized(ps$createPsArgs))),
list("Maximum match ratio", .studyAgentCmSpecValue(ps$matchOnPsArgs$maxRatio)),
list("Matching caliper", .studyAgentCmSpecValue(ps$matchOnPsArgs$caliper)),
list("Caliper scale", .studyAgentCmSpecValue(ps$matchOnPsArgs$caliperScale, "caliperScale"))
))
.studyAgentCmSpecPrintSection("Outcome Model", list(
list("Outcome model", .studyAgentCmSpecValue(outcome$modelType, "modelType")),
list("Condition on strata", .studyAgentCmSpecValue(outcome$stratified)),
list("Use covariates in outcome model", .studyAgentCmSpecValue(outcome$useCovariates)),
list("Use IPTW", .studyAgentCmSpecValue(outcome$inversePtWeighting)),
list("Use regularization", .studyAgentCmSpecValue(.studyAgentCmSpecRegularized(outcome)))
))
invisible(NULL)
}

local_cohort_method_specs <- function(body) {
list(
source = "stub_no_acp",
status = "stub",
recommendation = list(
mode = "free_text",
input_method = "typed_text",
source = "local_stub_no_acp",
status = "stub",
profile_name = "Recommended from free-text description (stub)",
raw_description = body$analytic_settings_description %||% "",
study_population = list(),
time_at_risk = list(),
propensity_score_adjustment = list(),
outcome_model = list(),
deferred_inputs = list(
function_argument_description = "implemented",
description_file_path = "implemented",
interactive_typed_description = "implemented"
),
defaults_snapshot = list()
),
cohort_methods_specifications = list(),
section_rationales = list(),
diagnostics = list(
source = "local_stub_no_acp",
reason = "acp_state$url is NULL; call acp_connect(url) first."
),
request = body
)
}
8 changes: 7 additions & 1 deletion R/OHDSIAssistant/R/execution_settings.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu
workFolder <- cfg$workFolder
resultsFolder <- cfg$resultsFolder
cohortIdFieldName <- cfg$cohortIdFieldName %||% "cohort_definition_id"
maxCores <- cfg$maxCores %||% parallel::detectCores()
maxCores <- suppressWarnings(as.integer(maxCores)[1])
if (is.na(maxCores) || maxCores < 1L) maxCores <- 1L

if (!nzchar(cdmDatabaseSchema)) stop("cdmDatabaseSchema must be provided in strategus-execution-settings.json")
if (!nzchar(workDatabaseSchema)) stop("workDatabaseSchema must be provided in strategus-execution-settings.json")
Expand All @@ -38,8 +41,10 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu
executionSettings <- createCdmExecutionSettings(
cdmDatabaseSchema = cdmDatabaseSchema,
workDatabaseSchema = workDatabaseSchema,
cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = cohortTable),
workFolder = workFolder,
resultsFolder = resultsFolder
resultsFolder = resultsFolder,
maxCores = maxCores
)

list(
Expand All @@ -51,6 +56,7 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu
cohortTable = cohortTable,
workFolder = workFolder,
resultsFolder = resultsFolder,
maxCores = maxCores,
cohortIdFieldName = cohortIdFieldName
)
}
Loading
Loading