OHDSI · rkboyce · May 9, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 16, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 demo-strategus-cohort-incidence/*
+demo-strategus-cohort-methods/*
 conda-pip-env-github-workflow.yml
 *sandbox*
 *.doit.db*
@@ -9,6 +10,12 @@ docs/study-agent-development-phenotype-suggestion-tool-architecture-plan.pdf
 demo/417_Acute_gastrointestinal_bleeding_events.json
 demo/299_Acute_gastrointestinal_bleeding_or_perforation_events.json
 demo/1197_Acute_gastrointestinal_bleeding.json
+test*.R
+!R/OHDSIAssistant/tests/testthat.R
+!R/OHDSIAssistant/tests/testthat/test-step-by-step-analytic-settings.R
+.obsidian/
+.DS_Store
+.codex
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/CODING_AGENT_README.md b/CODING_AGENT_README.md
@@ -60,6 +60,7 @@ The currently exposed ACP flow endpoints are:
 - `phenotype_recommendation_advice`
 - `phenotype_improvements`
 - `phenotype_intent_split`
+- `cohort_methods_intent_split`
 - `concept_sets_review`
 - `cohort_critique_general_design`
 - `keeper_concept_sets_generate`

diff --git a/CohortMethod_HANDOFF.md b/CohortMethod_HANDOFF.md
@@ -0,0 +1,76 @@
+
+## What Was Implemented
+
+We added the current CohortMethod shell path around
+`OHDSIAssistant::runStrategusCohortMethodsShell()`. In broad terms, the work now
+supports:
+
+- CohortMethod-specific intent splitting from one study-intent sentence into
+  target, comparator, and one or more outcome statements.
+  - We kept the existing `phenotype_intent_split` flow unchanged and added a
+    separate `cohort_methods_intent_split` flow. Please decide whether these
+    should remain separate or be consolidated later.
+- Phenotype recommendation and optional improvement for target, comparator, and
+  outcome cohorts.
+- Negative-control and covariate concept-set selection remain future work. This
+  step currently implements only the scaffold; it should be completed once ACP
+  flows for suggesting negative controls and covariate concept sets are
+  available.
+- CohortMethod analytic-settings collection through either `step_by_step` prompts
+  or `free_text` ACP recommendation.
+- Generation of reproducible output artifacts, CohortMethod analytic-settings
+  JSON, and Strategus R scripts including `scripts/06_cm_spec.R`
+
+### Main ACP/MCP/core additions:
+
+- ACP flow endpoint: 
+	- `/flows/cohort_methods_intent_split`
+	- ACP flow endpoint: `/flows/cohort_methods_specifications_recommendation`
+- MCP tool:
+	- `cohort_methods_intent_split`
+	- MCP tool: `cohort_methods_prompt_bundle`
+- R helper: `OHDSIAssistant::suggestCohortMethodSpecs()`
+- R shell integration:  `runStrategusCohortMethodsShell()`
+- Prompt/template assets:
+  - CohortMethod intent-split overview/spec/schema assets.
+  - CohortMethod cmAnalysis template and instruction assets under
+    `mcp_server/prompts/cohort_methods/`.
+
+## Read These For Details
+
+- Shell workflow, output layout, generated scripts, current boundaries, and
+  analytic-settings prompt details:
+  - `docs/STRATEGUS_COHORT_METHODS_SHELL.md`
+- End-to-end workflow diagrams:
+  - `docs/COHORT_METHODS_WORKFLOW.md`
+- Free-text analytic-settings recommendation flow, endpoint shape, response
+  shape, and projection from cmAnalysis-style specs:
+  - `docs/COHORT_METHODS_SPECIFICATIONS_RECOMMENDATION_DESIGN.md`
+- CohortMethod ACP/MCP service registry entries:
+  - `docs/SERVICE_REGISTRY.yaml`
+- CohortMethod cmAnalysis prompt/template assets:
+  - `mcp_server/prompts/cohort_methods/cmAnalysis_template.json`
+  - `mcp_server/prompts/cohort_methods/CM_ANALYSIS_TEMPLATE.md`
+  - `mcp_server/prompts/cohort_methods/instruction_cohort_methods_specs.md`
+  - `mcp_server/prompts/cohort_methods/output_style_cohort_methods_specs.md`
+- CohortMethod intent-split prompt/schema assets:
+  - `mcp_server/prompts/phenotype/overview_cohort_methods_intent_split.md`
+  - `mcp_server/prompts/phenotype/spec_cohort_methods_intent_split.md`
+  - `mcp_server/prompts/phenotype/output_schema_cohort_methods_intent_split.json`
+- R usage examples:
+  - `R/OHDSIAssistant/README.md`
+- Test and smoke-test commands:
+  - `docs/TESTING.md`
+
+## Remaining TODO
+
+
+Future CohortMethod coverage:
+
+- Replace dummy negative-control and covariate concept-set artifacts with real
+  ACP/MCP-backed workflows.
+	- The current  placeholder path cannot exclude high-correlation covariates, which can cause script `06_cm_spec.R` to fail when `errorOnHighCorrelation` is enabled.
+- Properly implement `scripts/04_keeper_review.R` for CohortMethod outputs.
+- Support multiple CohortMethod analyses
+- Add validation for cohort-method analytic-settings recommendations before
+  they are accepted into the shell/generated scripts.
diff --git a/R/OHDSIAssistant/NAMESPACE b/R/OHDSIAssistant/NAMESPACE
@@ -14,3 +14,5 @@ export(readStrategusDbDetails)
 export(createStrategusConnectionDetails)
 export(readStrategusExecutionSettings)
 export(createStrategusExecutionSettings)
+export(runStrategusCohortMethodsShell)
+export(suggestCohortMethodSpecs)
diff --git a/R/OHDSIAssistant/R/cohort_methods_workflow.R b/R/OHDSIAssistant/R/cohort_methods_workflow.R
@@ -0,0 +1,210 @@
+#' Suggest cohort method study specifications from a free-text description.
+#'
+#' Calls the ACP flow `/flows/cohort_methods_specifications_recommendation`
+#' and returns the cohort-methods recommendation, full analysis spec for
+#' traceability, and per-section rationales. Falls back to a local stub
+#' when `acp_state$url` is NULL.
+#'
+#' @param studyIntent protocol context string
+#' @param analyticSettingsDescription free-text description of the study design
+#' @param interactive when TRUE, prints a section summary (default: TRUE)
+#' @return list response from ACP flow or local stub
+#' @export
+suggestCohortMethodSpecs <- function(studyIntent,
+                                     analyticSettingsDescription,
+                                     interactive = TRUE) {
+  if (is.null(studyIntent) || !nzchar(trimws(studyIntent))) {
+    stop("Provide a non-empty studyIntent.")
+  }
+  if (is.null(analyticSettingsDescription) || !nzchar(trimws(analyticSettingsDescription))) {
+    stop("Provide a non-empty analyticSettingsDescription.")
+  }
+
+  body <- list(
+    study_intent = trimws(as.character(studyIntent)),
+    study_description = trimws(as.character(analyticSettingsDescription)),
+    analytic_settings_description = trimws(as.character(analyticSettingsDescription))
+  )
+
+  res <- if (!is.null(acp_state$url)) {
+    .acp_post("/flows/cohort_methods_specifications_recommendation", body)
+  } else {
+    local_cohort_method_specs(body)
+  }
+
+  if (isTRUE(interactive)) {
+    cat("\n== Cohort Method Specifications ==\n")
+    cat("Status:", res$status %||% "(missing)", "\n")
+    rec <- res$recommendation %||% list()
+    if (length(rec) > 0) {
+      cat("Profile:", rec$profile_name %||% "(none)", "\n")
+      cat("Recommendation status:", rec$status %||% "(none)", "\n")
+    }
+    rats <- res$section_rationales %||% list()
+    if (length(rats) > 0) {
+      for (section in names(rats)) {
+        entry <- rats[[section]]
+        cat(sprintf("  - %s: confidence=%s  %s\n",
+                    section,
+                    entry$confidence %||% "?",
+                    entry$rationale %||% ""))
+      }
+    }
+    failed <- res$diagnostics$failed_sections %||% list()
+    if (length(failed) > 0) {
+      cat("Backfilled sections:", paste(unlist(failed), collapse = ", "), "\n")
+    }
+    .studyAgentPrintCohortMethodSpecsSummary(rec)
+  }
+  invisible(res)
+}
+
+.studyAgentCmSpecIsPresent <- function(value) {
+  if (is.null(value) || length(value) == 0) return(FALSE)
+  if (length(value) == 1 && is.atomic(value) && is.na(value)) return(FALSE)
+  TRUE
+}
+
+.studyAgentCmSpecValue <- function(value, path = NULL) {
+  `%||%` <- function(x, y) if (is.null(x)) y else x
+  if (!.studyAgentCmSpecIsPresent(value)) return("<not set>")
+  if (is.character(value) && length(value) == 1 && !nzchar(trimws(value))) return("<blank>")
+  if (is.logical(value) && length(value) == 1) return(if (isTRUE(value)) "Yes" else "No")
+  if (is.character(value) && length(value) == 1) {
+    mapped <- switch(
+      path %||% "",
+      "startAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]],
+      "endAnchor" = c("cohort start" = "cohort start date", "cohort end" = "cohort end date")[[value]],
+      "ps_strategy" = c("match_on_ps" = "Match on propensity score", "stratify_by_ps" = "Stratify on propensity score", "none" = "None")[[value]],
+      "caliperScale" = c("propensity score" = "Propensity score", "standardized" = "Standardized", "standardized logit" = "Standardized logit")[[value]],
+      "modelType" = c("cox" = "Cox proportional hazards", "poisson" = "Poisson regression", "logistic" = "Logistic regression")[[value]],
+      "removeDuplicateSubjects" = c(
+        "keep all" = "Keep All",
+        "keep first" = "Keep First",
+        "remove all" = "Remove All",
+        "keep first, truncate to second" = "Keep First, Truncate to Second"
+      )[[value]],
+      NULL
+    )
+    if (!is.null(mapped) && length(mapped) == 1 && !is.na(mapped)) return(mapped)
+    return(value)
+  }
+  if (is.numeric(value) && length(value) == 1) {
+    return(trimws(formatC(as.numeric(value), format = "fg", digits = 10)))
+  }
+  paste(as.character(value), collapse = ", ")
+}
+
+.studyAgentCmSpecRegularized <- function(args) {
+  if (!is.list(args)) return(FALSE)
+  .studyAgentCmSpecIsPresent(args$prior)
+}
+
+.studyAgentCmSpecPsStrategy <- function(ps) {
+  if (!is.list(ps)) return("none")
+  if (.studyAgentCmSpecIsPresent(ps$matchOnPsArgs)) return("match_on_ps")
+  if (.studyAgentCmSpecIsPresent(ps$stratifyByPsArgs)) return("stratify_by_ps")
+  "none"
+}
+
+.studyAgentCmSpecTrimming <- function(trim_args) {
+  if (!is.list(trim_args) || length(trim_args) == 0) return("None")
+  if (.studyAgentCmSpecIsPresent(trim_args$equipoiseBounds)) return("By equipoise")
+  if (.studyAgentCmSpecIsPresent(trim_args$trimFraction)) {
+    fraction <- suppressWarnings(as.numeric(trim_args$trimFraction))
+    if (length(fraction) == 1 && !is.na(fraction) && fraction > 0) {
+      return(sprintf("By percent (%s%%)", trimws(formatC(fraction * 100, format = "fg", digits = 6))))
+    }
+  }
+  "None"
+}
+
+.studyAgentCmSpecPrintSection <- function(title, rows) {
+  cat(sprintf("[%s]\n", title))
+  for (row in rows) {
+    cat(sprintf("  - %s: %s\n", row[[1]], row[[2]]))
+  }
+}
+
+.studyAgentPrintCohortMethodSpecsSummary <- function(recommendation) {
+  if (!is.list(recommendation)) return(invisible(NULL))
+  study_population <- recommendation$study_population %||% list()
+  time_at_risk <- recommendation$time_at_risk %||% list()
+  ps <- recommendation$propensity_score_adjustment %||% list()
+  outcome <- recommendation$outcome_model %||% list()
+  cohort_method_data <- study_population$cohortMethodDataArgs %||% list()
+  if (length(study_population) == 0 && length(time_at_risk) == 0 && length(ps) == 0 && length(outcome) == 0) {
+    return(invisible(NULL))
+  }
+
+  cat("\n")
+  .studyAgentCmSpecPrintSection("Study Population", list(
+    list("Study start date", .studyAgentCmSpecValue(cohort_method_data$studyStartDate)),
+    list("Study end date", .studyAgentCmSpecValue(cohort_method_data$studyEndDate)),
+    list("Restrict to common period", .studyAgentCmSpecValue(cohort_method_data$restrictToCommonPeriod)),
+    list("First exposure only", .studyAgentCmSpecValue(cohort_method_data$firstExposureOnly)),
+    list("Washout period", .studyAgentCmSpecValue(cohort_method_data$washoutPeriod)),
+    list("Remove duplicate subjects", .studyAgentCmSpecValue(cohort_method_data$removeDuplicateSubjects, "removeDuplicateSubjects")),
+    list("Censor at new risk window", .studyAgentCmSpecValue(study_population$censorAtNewRiskWindow)),
+    list("Remove prior outcomes", .studyAgentCmSpecValue(study_population$removeSubjectsWithPriorOutcome)),
+    list("Prior outcome lookback", .studyAgentCmSpecValue(study_population$priorOutcomeLookback)),
+    list("Maximum cohort size", .studyAgentCmSpecValue(cohort_method_data$maxCohortSize))
+  ))
+  .studyAgentCmSpecPrintSection("Time At Risk", list(
+    list("Minimum days at risk", .studyAgentCmSpecValue(study_population$minDaysAtRisk)),
+    list("Risk window start", .studyAgentCmSpecValue(time_at_risk$riskWindowStart)),
+    list("Risk window start anchor", .studyAgentCmSpecValue(time_at_risk$startAnchor, "startAnchor")),
+    list("Risk window end", .studyAgentCmSpecValue(time_at_risk$riskWindowEnd)),
+    list("Risk window end anchor", .studyAgentCmSpecValue(time_at_risk$endAnchor, "endAnchor"))
+  ))
+  .studyAgentCmSpecPrintSection("Propensity Score Adjustment", list(
+    list("PS trimming", .studyAgentCmSpecTrimming(ps$trimByPsArgs)),
+    list("PS adjustment strategy", .studyAgentCmSpecValue(.studyAgentCmSpecPsStrategy(ps), "ps_strategy")),
+    list("Max cohort size for PS fitting", .studyAgentCmSpecValue(ps$createPsArgs$maxCohortSizeForFitting)),
+    list("Test covariate correlation", .studyAgentCmSpecValue(ps$createPsArgs$errorOnHighCorrelation)),
+    list("Use regularization", .studyAgentCmSpecValue(.studyAgentCmSpecRegularized(ps$createPsArgs))),
+    list("Maximum match ratio", .studyAgentCmSpecValue(ps$matchOnPsArgs$maxRatio)),
+    list("Matching caliper", .studyAgentCmSpecValue(ps$matchOnPsArgs$caliper)),
+    list("Caliper scale", .studyAgentCmSpecValue(ps$matchOnPsArgs$caliperScale, "caliperScale"))
+  ))
+  .studyAgentCmSpecPrintSection("Outcome Model", list(
+    list("Outcome model", .studyAgentCmSpecValue(outcome$modelType, "modelType")),
+    list("Condition on strata", .studyAgentCmSpecValue(outcome$stratified)),
+    list("Use covariates in outcome model", .studyAgentCmSpecValue(outcome$useCovariates)),
+    list("Use IPTW", .studyAgentCmSpecValue(outcome$inversePtWeighting)),
+    list("Use regularization", .studyAgentCmSpecValue(.studyAgentCmSpecRegularized(outcome)))
+  ))
+  invisible(NULL)
+}
+
+local_cohort_method_specs <- function(body) {
+  list(
+    source = "stub_no_acp",
+    status = "stub",
+    recommendation = list(
+      mode = "free_text",
+      input_method = "typed_text",
+      source = "local_stub_no_acp",
+      status = "stub",
+      profile_name = "Recommended from free-text description (stub)",
+      raw_description = body$analytic_settings_description %||% "",
+      study_population = list(),
+      time_at_risk = list(),
+      propensity_score_adjustment = list(),
+      outcome_model = list(),
+      deferred_inputs = list(
+        function_argument_description = "implemented",
+        description_file_path = "implemented",
+        interactive_typed_description = "implemented"
+      ),
+      defaults_snapshot = list()
+    ),
+    cohort_methods_specifications = list(),
+    section_rationales = list(),
+    diagnostics = list(
+      source = "local_stub_no_acp",
+      reason = "acp_state$url is NULL; call acp_connect(url) first."
+    ),
+    request = body
+  )
+}
diff --git a/R/OHDSIAssistant/R/execution_settings.R b/R/OHDSIAssistant/R/execution_settings.R
@@ -26,6 +26,9 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu
   workFolder <- cfg$workFolder
   resultsFolder <- cfg$resultsFolder
   cohortIdFieldName <- cfg$cohortIdFieldName %||% "cohort_definition_id"
+  maxCores <- cfg$maxCores %||% parallel::detectCores()
+  maxCores <- suppressWarnings(as.integer(maxCores)[1])
+  if (is.na(maxCores) || maxCores < 1L) maxCores <- 1L
 
   if (!nzchar(cdmDatabaseSchema)) stop("cdmDatabaseSchema must be provided in strategus-execution-settings.json")
   if (!nzchar(workDatabaseSchema)) stop("workDatabaseSchema must be provided in strategus-execution-settings.json")
@@ -38,8 +41,10 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu
   executionSettings <- createCdmExecutionSettings(
     cdmDatabaseSchema = cdmDatabaseSchema,
     workDatabaseSchema = workDatabaseSchema,
+    cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = cohortTable),
     workFolder = workFolder,
-    resultsFolder = resultsFolder
+    resultsFolder = resultsFolder,
+    maxCores = maxCores
   )
 
   list(
@@ -51,6 +56,7 @@ createStrategusExecutionSettings <- function(path = file.path(getwd(), "strategu
     cohortTable = cohortTable,
     workFolder = workFolder,
     resultsFolder = resultsFolder,
+    maxCores = maxCores,
     cohortIdFieldName = cohortIdFieldName
   )
 }