diff --git a/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb b/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb index 6973dd7..68af87f 100644 --- a/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb +++ b/docs/source/tutorials/ROI_Mapping_Tutorial/ROI_Mapping_Tutorial.ipynb @@ -309,6 +309,219 @@ "You can add lesion or other one-off mappings using `manual_overrides`:" ] }, + { + "cell_type": "markdown", + "id": "42944b4d", + "metadata": {}, + "source": [ + "## Configuration: Valid Organ Names\n", + "\n", + "### Overview\n", + "\n", + "The `voi_mappings_config.json` file now includes a `valid_organ_names` section that defines which organ/VOI names are accepted as valid targets for mappings. This prevents invalid mappings from silently failing.\n", + "\n", + "### Why This Matters\n", + "\n", + "When you create a mapping like:\n", + "```json\n", + "\"spect_mappings\": {\n", + " \"Kidney_L_a\": \"Kidney_L\"\n", + "}\n", + "```\n", + "\n", + "The system validates that `\"Kidney_L\"` is in the list of `valid_organ_names`. If it's not, the mapping is rejected and the ROI stays unmapped (identity mapping). This helps catch typos and naming inconsistencies early.\n", + "\n", + "### Default Valid Organ Names (OLINDA-Compatible)\n", + "\n", + "The package template includes these OLINDA-compatible organ names by default. You can customize this list for your project:\n", + "\n", + "```json\n", + "{\n", + " \"valid_organ_names\": {\n", + " \"_description\": \"List of valid organ/VOI names for validation. These default names are compatible with OLINDA dosimetry calculations. Users can add custom organs as needed for their workflows.\",\n", + " \"names\": [\n", + " \"Kidney_Left\",\n", + " \"Kidney_Right\",\n", + " \"Liver\",\n", + " \"Spleen\",\n", + " \"Bladder\",\n", + " \"SubmandibularGland_Left\",\n", + " \"SubmandibularGland_Right\",\n", + " \"ParotidGland_Left\",\n", + " \"ParotidGland_Right\",\n", + " \"BoneMarrow\",\n", + " \"Skeleton\",\n", + " \"WholeBody\",\n", + " \"RemainderOfBody\",\n", + " \"TotalTumorBurden\"\n", + " ]\n", + " }\n", + "}\n", + "```\n", + "\n", + "### Customizing Valid Organ Names\n", + "\n", + "If your project uses different naming conventions or custom organs, add them to your project's `voi_mappings_config.json`:\n", + "\n", + "```json\n", + "{\n", + " \"valid_organ_names\": {\n", + " \"_description\": \"Custom organ names for our project\",\n", + " \"names\": [\n", + " \"Kidney_Left\",\n", + " \"Kidney_Right\",\n", + " \"kidney_cyst_left\",\n", + " \"kidney_cyst_right\",\n", + " \"MyCustomOrgan\",\n", + " \"Lesion_1\",\n", + " \"Lesion_2\"\n", + " ]\n", + " },\n", + " \"ct_mappings\": {...},\n", + " \"spect_mappings\": {...}\n", + "}\n", + "```\n", + "\n", + "### Loading Valid Organ Names in Your Code\n", + "\n", + "```python\n", + "from pytheranostics.imaging_ds import LongitudinalStudy\n", + "\n", + "# Get the current valid organ names (loads from config or uses defaults)\n", + "valid_organs = LongitudinalStudy._get_valid_organ_names()\n", + "print(\"Valid organs:\", valid_organs)\n", + "```\n", + "\n", + "The system searches for `voi_mappings_config.json` in this order:\n", + "1. **Current directory** (your notebook location)\n", + "2. **One level up** (project root)\n", + "3. **Package template** (OLINDA defaults)\n", + "\n", + "### Workflow Integration\n", + "\n", + "When you call `create_studies_with_masks()` with a mapping config:\n", + "\n", + "```python\n", + "longCT, longSPECT, inj, used = tx.imaging_ds.create_studies_with_masks(\n", + " storage_root=\"./data\",\n", + " patient_id=\"PATIENT_ID\",\n", + " cycle_no=1,\n", + " mapping_config=\"./voi_mappings_config.json\" # ← Loads valid_organ_names too\n", + ")\n", + "```\n", + "\n", + "Both the mappings AND the valid organ names are loaded from your config file. This ensures consistency across your project." + ] + }, + { + "cell_type": "markdown", + "id": "825a6f45", + "metadata": {}, + "source": [ + "## Configuration: Canonical Name Mappings\n", + "\n", + "### Overview\n", + "\n", + "The `canonical_mappings` section in `voi_mappings_config.json` defines automatic abbreviation normalization for the `auto_map=True` mode in `create_studies_with_masks()`. This is useful when your RTSTRUCT files use abbreviated names like `Kidney_L` but you want them automatically converted to the canonical form `Kidney_Left`.\n", + "\n", + "### How It Works\n", + "\n", + "When you set `auto_map=True`:\n", + "\n", + "```python\n", + "longCT, longSPECT, inj, used = tx.imaging_ds.create_studies_with_masks(\n", + " storage_root=\"./data\",\n", + " patient_id=\"PATIENT_ID\",\n", + " cycle_no=1,\n", + " auto_map=True # ← Enables automatic canonical name mapping\n", + ")\n", + "```\n", + "\n", + "The system:\n", + "1. Loads canonical_mappings from your config file\n", + "2. For each ROI name in your RTSTRUCT:\n", + " - Strips modality suffixes (`_m`, `_a`)\n", + " - Looks up the base name in canonical_mappings\n", + " - Maps to the canonical name if found\n", + " - Keeps the name as-is if no mapping exists\n", + "\n", + "### Default Canonical Mappings\n", + "\n", + "The package template includes these common abbreviations:\n", + "\n", + "```json\n", + "{\n", + " \"canonical_mappings\": {\n", + " \"_description\": \"Best-effort ROI name normalization for auto_map mode. Maps abbreviated/common names to canonical organ names. Used when auto_map=True is set in create_studies_with_masks.\",\n", + " \"mappings\": {\n", + " \"Kidney_L\": \"Kidney_Left\",\n", + " \"Kidney_R\": \"Kidney_Right\",\n", + " \"Parotid_L\": \"ParotidGland_Left\",\n", + " \"Parotid_R\": \"ParotidGland_Right\",\n", + " \"Submandibular_L\": \"SubmandibularGland_Left\",\n", + " \"Submandibular_R\": \"SubmandibularGland_Right\",\n", + " \"WBCT\": \"WholeBody\"\n", + " }\n", + " }\n", + "}\n", + "```\n", + "\n", + "### Customizing Canonical Mappings\n", + "\n", + "Add custom mappings to your project's `voi_mappings_config.json` for your institution's naming conventions:\n", + "\n", + "```json\n", + "{\n", + " \"canonical_mappings\": {\n", + " \"_description\": \"Custom mappings for our institution\",\n", + " \"mappings\": {\n", + " \"Kidney_L\": \"Kidney_Left\",\n", + " \"Kidney_R\": \"Kidney_Right\",\n", + " \"KL\": \"Kidney_Left\",\n", + " \"KR\": \"Kidney_Right\",\n", + " \"Parotid_L\": \"ParotidGland_Left\",\n", + " \"Parotid_R\": \"ParotidGland_Right\",\n", + " \"Liver_N\": \"Liver\",\n", + " \"Liver_C\": \"Liver\",\n", + " \"WBCT\": \"WholeBody\"\n", + " }\n", + " }\n", + "}\n", + "```\n", + "\n", + "### When to Use: auto_map vs Explicit Mapping\n", + "\n", + "| Scenario | Approach | Example |\n", + "|----------|----------|---------|\n", + "| **Known abbreviations, consistent naming** | `auto_map=True` | RTSTRUCT names are always `Kidney_L`, `Kidney_R`, etc. → auto converts to canonical |\n", + "| **Modality-specific names** | Explicit `ct_mappings`/`spect_mappings` | CT has `Kidney_L_m`, SPECT has `Kidney_L_a` → different mappings per modality |\n", + "| **Complex/variable naming** | Explicit mappings in config | ROIs named inconsistently across projects → use full `ct_mappings`/`spect_mappings` |\n", + "| **Mixed approach** | `auto_map=True` + explicit overrides | Use auto_map for most, but override specific conflicting names with explicit mappings |\n", + "\n", + "### Example Workflow: Auto-Map with Suffix Stripping\n", + "\n", + "```python\n", + "# RTSTRUCT contains: Kidney_L_a, Kidney_R_a, Kidney_L_m, Kidney_R_m\n", + "\n", + "longCT, longSPECT, inj, used = tx.imaging_ds.create_studies_with_masks(\n", + " storage_root=\"./data\",\n", + " patient_id=\"PATIENT_ID\",\n", + " cycle_no=1,\n", + " auto_map=True # Enables canonical_mappings\n", + ")\n", + "\n", + "# Results (from used_mappings):\n", + "# CT: Kidney_L_m → Kidney_Left, Kidney_R_m → Kidney_Right\n", + "# SPECT: Kidney_L_a → Kidney_Left, Kidney_R_a → Kidney_Right\n", + "```\n", + "\n", + "The system:\n", + "1. Strips `_m` and `_a` suffixes → `Kidney_L`, `Kidney_R`\n", + "2. Looks up in canonical_mappings → finds `Kidney_Left`, `Kidney_Right`\n", + "3. Validates against valid_organ_names → passes\n", + "4. Applies the mapping ✓" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/pytheranostics/imaging_ds/cycle_loader.py b/pytheranostics/imaging_ds/cycle_loader.py index 90d8a36..0dd7013 100644 --- a/pytheranostics/imaging_ds/cycle_loader.py +++ b/pytheranostics/imaging_ds/cycle_loader.py @@ -7,6 +7,7 @@ from __future__ import annotations +import json import re from pathlib import Path from typing import Dict, List, Optional, Tuple, Union @@ -224,26 +225,69 @@ def prepare_cycle_inputs( # --- New high-level orchestration API --------------------------------------------------------- +def _get_canonical_mappings() -> Dict[str, str]: + """Load canonical name mappings from config file. + + Searches for voi_mappings_config.json in order: + 1. Current directory (project-specific) + 2. One level up (project root) + 3. Package template (defaults) + + Returns + ------- + Dict[str, str] + Mapping of abbreviated/common names to canonical names. + Returns empty dict if no config found. + """ + search_paths = [ + Path.cwd() / "voi_mappings_config.json", + Path.cwd().parent / "voi_mappings_config.json", + ] + + for config_path in search_paths: + if config_path.exists(): + try: + with open(config_path, "r") as f: + config = json.load(f) + if "canonical_mappings" in config: + canon_config = config["canonical_mappings"] + if isinstance(canon_config, dict): + return canon_config.get("mappings", {}) + except Exception: + continue + + # Try package template + try: + import importlib.resources as pkg_resources + + template_path = pkg_resources.files("pytheranostics.data").joinpath( + "configuration_templates/voi_mappings_config.json" + ) + with open(template_path, "r") as f: + config = json.load(f) + if "canonical_mappings" in config: + canon_config = config["canonical_mappings"] + if isinstance(canon_config, dict): + return canon_config.get("mappings", {}) + except Exception: + pass + + return {} + + def _canonical_mask_name(name: str) -> str: - """Map RTSTRUCT ROI names to canonical pyTheranostics mask names. + """Apply canonical name mappings from config. Best-effort normalization used for auto mapping. Keeps unknown names as-is. + Mappings are loaded from voi_mappings_config.json. """ # Strip modality suffixes often used in notebooks (e.g., _m for CT-based, _a for activity) base = name if base.endswith("_m") or base.endswith("_a"): base = base[:-2] - # Common synonyms/abbreviations - replacements = { - "Kidney_L": "Kidney_Left", - "Kidney_R": "Kidney_Right", - "Parotid_L": "ParotidGland_Left", - "Parotid_R": "ParotidGland_Right", - "Submandibular_L": "SubmandibularGland_Left", - "Submandibular_R": "SubmandibularGland_Right", - "WBCT": "WholeBody", - } + # Load canonical mappings from config + replacements = _get_canonical_mappings() return replacements.get(base, base) @@ -389,7 +433,7 @@ def create_studies_with_masks( apply_spect_mapping = (final_spect_mapping is not None) or auto_map def _is_valid_target(name: str) -> bool: - if name in LongitudinalStudy._VALID_ORGAN_NAMES: + if name in LongitudinalStudy._get_valid_organ_names(): return True return re.match(r"^Lesion_([1-9]\d*)$", name) is not None diff --git a/pytheranostics/imaging_ds/longitudinal_study.py b/pytheranostics/imaging_ds/longitudinal_study.py index 44edebc..a64499b 100644 --- a/pytheranostics/imaging_ds/longitudinal_study.py +++ b/pytheranostics/imaging_ds/longitudinal_study.py @@ -28,22 +28,80 @@ class LongitudinalStudy: of interest and meta-data. """ - _VALID_ORGAN_NAMES = [ - "Kidney_Left", - "Kidney_Right", - "Liver", - "Spleen", - "Bladder", - "SubmandibularGland_Left", - "SubmandibularGland_Right", - "ParotidGland_Left", - "ParotidGland_Right", - "BoneMarrow", - "Skeleton", - "WholeBody", - "RemainderOfBody", - "TotalTumorBurden", - ] + # Cached valid organ names loaded from config + _VALID_ORGAN_NAMES = None + + @classmethod + def _get_valid_organ_names(cls) -> List[str]: + """Get valid organ names from config file. + + Searches for voi_mappings_config.json in order: + 1. Current directory (project-specific config) + 2. One level up (project root) + 3. Package template (OLINDA-compatible defaults) + + Returns + ------- + List[str] + List of valid organ names. + + Raises + ------ + FileNotFoundError + If no config file can be found. + ValueError + If config file doesn't contain valid_organ_names. + """ + if cls._VALID_ORGAN_NAMES is not None: + return cls._VALID_ORGAN_NAMES + + # Try project-specific configs first + search_paths = [ + Path.cwd() / "voi_mappings_config.json", + Path.cwd().parent / "voi_mappings_config.json", + ] + + for config_path in search_paths: + if config_path.exists(): + try: + with open(config_path, "r") as f: + config = json.load(f) + if "valid_organ_names" in config: + # Handle both old format (list) and new format (dict with names key) + organ_names = config["valid_organ_names"] + if isinstance(organ_names, dict): + cls._VALID_ORGAN_NAMES = organ_names.get("names", []) + else: + cls._VALID_ORGAN_NAMES = organ_names + return cls._VALID_ORGAN_NAMES + except Exception: + continue + + # Load from package template (OLINDA defaults) + try: + import importlib.resources as pkg_resources + + template_path = pkg_resources.files("pytheranostics.data").joinpath( + "configuration_templates/voi_mappings_config.json" + ) + with open(template_path, "r") as f: + config = json.load(f) + if "valid_organ_names" in config: + organ_names = config["valid_organ_names"] + if isinstance(organ_names, dict): + cls._VALID_ORGAN_NAMES = organ_names.get("names", []) + else: + cls._VALID_ORGAN_NAMES = organ_names + return cls._VALID_ORGAN_NAMES + except Exception as e: + raise FileNotFoundError( + "Could not load valid_organ_names from any config file. " + "Please ensure voi_mappings_config.json exists in your project or package." + ) from e + + raise ValueError( + "Config file found but does not contain 'valid_organ_names' section." + ) def __init__( self, @@ -196,10 +254,11 @@ def _is_valid_mask_name(mask_name: str) -> bool: """Check if a mask name is valid. Valid names are either: - - Standard organ names from _VALID_ORGAN_NAMES + - Standard organ names from config or default list - Lesion names in format 'Lesion_N' where N is a positive integer """ - if mask_name in LongitudinalStudy._VALID_ORGAN_NAMES: + valid_names = LongitudinalStudy._get_valid_organ_names() + if mask_name in valid_names: return True lesion_pattern = r"^Lesion_([1-9]\d*)$" return bool(re.match(lesion_pattern, mask_name)) diff --git a/tests/test_longitudinal_study.py b/tests/test_longitudinal_study.py index fba8834..4080571 100644 --- a/tests/test_longitudinal_study.py +++ b/tests/test_longitudinal_study.py @@ -103,8 +103,10 @@ def test_init_success_minimal(self): assert len(study.images) == 2 assert len(study.meta) == 2 assert len(study.masks) == 0 - assert isinstance(study._VALID_ORGAN_NAMES, list) - assert "Liver" in study._VALID_ORGAN_NAMES + # Test that _get_valid_organ_names() returns a list and contains expected organs + valid_organs = LongitudinalStudy._get_valid_organ_names() + assert isinstance(valid_organs, list) + assert "Liver" in valid_organs assert LongitudinalStudy._is_valid_mask_name("Lesion_1") def test_init_mismatched_keys_raises_error(self):