From 5121dfaa38d1d02644006add499804574b7646fd Mon Sep 17 00:00:00 2001 From: guido Date: Fri, 29 May 2026 15:49:50 +0200 Subject: [PATCH] Fix RandomForest ignoring the max_depth hyperparameter RandomForest.build_model read max_depth from the hyperparameters (including the "None" -> None conversion) but never passed it to the RandomForestRegressor constructor. As a result every forest was built with sklearn's default max_depth=None regardless of the configured value, and hyperparameter tuning over max_depth had no effect. This also affects SingleDrugRandomForest and MultiViewRandomForest, which inherit build_model. Add a regression test asserting max_depth is forwarded to the underlying RandomForestRegressor. Co-Authored-By: Claude Opus 4.8 (1M context) --- drevalpy/models/baselines/sklearn_models.py | 3 ++- tests/models/test_baselines.py | 26 ++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drevalpy/models/baselines/sklearn_models.py b/drevalpy/models/baselines/sklearn_models.py index 0a00286f..42d91dd2 100644 --- a/drevalpy/models/baselines/sklearn_models.py +++ b/drevalpy/models/baselines/sklearn_models.py @@ -335,7 +335,7 @@ def build_model(self, hyperparameters: dict): Builds the model from hyperparameters. :param hyperparameters: Hyperparameters for the model. Contains n_estimators, criterion, max_samples, - and n_jobs. + max_depth and n_jobs. """ super().build_model(hyperparameters) if self.hyperparameters["max_depth"] == "None": @@ -344,6 +344,7 @@ def build_model(self, hyperparameters: dict): n_estimators=self.hyperparameters["n_estimators"], criterion=self.hyperparameters["criterion"], max_samples=self.hyperparameters["max_samples"], + max_depth=self.hyperparameters["max_depth"], n_jobs=self.hyperparameters["n_jobs"], ) diff --git a/tests/models/test_baselines.py b/tests/models/test_baselines.py index 7168713d..410bb8e8 100644 --- a/tests/models/test_baselines.py +++ b/tests/models/test_baselines.py @@ -20,10 +20,34 @@ NaiveTissueDrugMeanPredictor, NaiveTissueMeanPredictor, ) -from drevalpy.models.baselines.sklearn_models import SklearnModel +from drevalpy.models.baselines.sklearn_models import RandomForest, SklearnModel from drevalpy.models.drp_model import DRPModel +@pytest.mark.parametrize("max_depth_input, expected", [(5, 5), (10, 10), (30, 30), ("None", None)]) +def test_random_forest_respects_max_depth(max_depth_input, expected) -> None: + """Ensure RandomForest forwards max_depth to the underlying RandomForestRegressor. + + Regression test: max_depth was read from the hyperparameters but never passed to the + RandomForestRegressor constructor, so every forest was built with the default max_depth=None + regardless of the configured value. + + :param max_depth_input: max_depth value as provided via the hyperparameters + :param expected: max_depth expected on the built sklearn model + """ + model = RandomForest() + model.build_model( + { + "n_estimators": 10, + "criterion": "squared_error", + "max_samples": 0.5, + "n_jobs": 1, + "max_depth": max_depth_input, + } + ) + assert model.model.max_depth == expected + + @pytest.mark.parametrize( "model_name", [