apply changes

Giacomo Caria · Giacomo Caria · commit 71bbab09e6ef · 2025-10-17T17:34:45.000-03:00
diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -6,7 +6,16 @@
 from contextlib import suppress
 from html import escape
 from textwrap import dedent
-from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, Union, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Concatenate,
+    Literal,
+    ParamSpec,
+    TypeVar,
+    Union,
+    overload,
+)
 
 import numpy as np
 import pandas as pd
@@ -925,6 +934,7 @@ def _resample(
         offset: pd.Timedelta | datetime.timedelta | str | None,
         origin: str | DatetimeLike,
         restore_coord_dims: bool | None,
+        boundaries: Literal["exact", "trim"] | None = None,
         **indexer_kwargs: ResampleCompatible | Resampler,
     ) -> T_Resample:
         """Returns a Resample object for performing resampling operations.
@@ -960,6 +970,11 @@ def _resample(
         restore_coord_dims : bool, optional
             If True, also restore the dimension order of multi-dimensional
             coordinates.
+        boundaries : {"exact", "trim"}, optional
+            How to handle boundaries when the data doesn't evenly fit the resampling
+            frequency. If 'exact', a ValueError will be raised if the data doesn't
+            evenly fit. If 'trim', incomplete periods are dropped. If None (default),
+            uses the current behavior (includes incomplete periods).
         **indexer_kwargs : {dim: freq}
             The keyword arguments form of ``indexer``.
             One of indexer or indexer_kwargs must be provided.
@@ -1107,8 +1122,45 @@ def _resample(
         grouper: Resampler
         if isinstance(freq, ResampleCompatible):
             grouper = TimeResampler(
-                freq=freq, closed=closed, label=label, origin=origin, offset=offset
+                freq=freq,
+                closed=closed,
+                label=label,
+                origin=origin,
+                offset=offset,
+                boundaries=boundaries,
             )
+
+            # Apply trim logic at the resample level if needed
+            if boundaries == "trim":
+                # First, get the resampling periods to identify incomplete ones
+                from xarray.core.groupby import ResolvedGrouper
+
+                temp_grouper = ResolvedGrouper(grouper, group, self)
+                temp_encoded = temp_grouper.encoded
+
+                # Count data points in each period
+                codes = temp_encoded.codes
+                counts = np.bincount(codes.values)
+
+                if len(counts) > 0:
+                    # Find the most common count (expected points per period)
+                    unique_counts, count_frequencies = np.unique(
+                        counts, return_counts=True
+                    )
+                    most_common_count = unique_counts[np.argmax(count_frequencies)]
+
+                    # Identify incomplete periods
+                    incomplete_periods = counts < most_common_count
+
+                    if np.any(incomplete_periods):
+                        # Find which data points belong to incomplete periods
+                        incomplete_codes = np.where(incomplete_periods)[0]
+                        valid_mask = ~np.isin(codes.values, incomplete_codes)
+
+                        # Filter the data to exclude incomplete periods
+                        group = group.isel({group.dims[0]: valid_mask})
+                        # Also update the object to match the filtered group
+                        self = self.isel({group.dims[0]: valid_mask})
         elif isinstance(freq, Resampler):
             grouper = freq
         else:
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -7433,6 +7433,7 @@ def resample(
         offset: pd.Timedelta | datetime.timedelta | str | None = None,
         origin: str | DatetimeLike = "start_day",
         restore_coord_dims: bool | None = None,
+        boundaries: Literal["exact", "trim"] | None = None,
         **indexer_kwargs: ResampleCompatible | Resampler,
     ) -> DataArrayResample:
         """Returns a Resample object for performing resampling operations.
@@ -7468,6 +7469,11 @@ def resample(
         restore_coord_dims : bool, optional
             If True, also restore the dimension order of multi-dimensional
             coordinates.
+        boundaries : {"exact", "trim"}, optional
+            How to handle boundaries when the data doesn't evenly fit the resampling
+            frequency. If 'exact', a ValueError will be raised if the data doesn't
+            evenly fit. If 'trim', incomplete periods are dropped. If None (default),
+            uses the current behavior (includes incomplete periods).
         **indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
             The keyword arguments form of ``indexer``.
             One of indexer or indexer_kwargs must be provided.
@@ -7572,6 +7578,7 @@ def resample(
             offset=offset,
             origin=origin,
             restore_coord_dims=restore_coord_dims,
+            boundaries=boundaries,
             **indexer_kwargs,
         )
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -10371,6 +10371,7 @@ def resample(
         offset: pd.Timedelta | datetime.timedelta | str | None = None,
         origin: str | DatetimeLike = "start_day",
         restore_coord_dims: bool | None = None,
+        boundaries: Literal["exact", "trim"] | None = None,
         **indexer_kwargs: ResampleCompatible | Resampler,
     ) -> DatasetResample:
         """Returns a Resample object for performing resampling operations.
@@ -10406,6 +10407,11 @@ def resample(
         restore_coord_dims : bool, optional
             If True, also restore the dimension order of multi-dimensional
             coordinates.
+        boundaries : {"exact", "trim"}, optional
+            How to handle boundaries when the data doesn't evenly fit the resampling
+            frequency. If 'exact', a ValueError will be raised if the data doesn't
+            evenly fit. If 'trim', incomplete periods are dropped. If None (default),
+            uses the current behavior (includes incomplete periods).
         **indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
             The keyword arguments form of ``indexer``.
             One of indexer or indexer_kwargs must be provided.
@@ -10438,6 +10444,7 @@ def resample(
             offset=offset,
             origin=origin,
             restore_coord_dims=restore_coord_dims,
+            boundaries=boundaries,
             **indexer_kwargs,
         )
 
diff --git a/xarray/groupers.py b/xarray/groupers.py
@@ -484,13 +484,19 @@ class TimeResampler(Resampler):
         - 'end_day': `origin` is the ceiling midnight of the last day
     offset : pd.Timedelta, datetime.timedelta, or str, default is None
         An offset timedelta added to the origin.
+    boundaries : {"exact", "trim"}, optional
+        How to handle boundaries when the data doesn't evenly fit the resampling
+        frequency. If 'exact', a ValueError will be raised if the data doesn't
+        evenly fit. If 'trim', incomplete periods are dropped. If None (default),
+        uses the current behavior (includes incomplete periods).
     """
 
     freq: ResampleCompatible
     closed: SideOptions | None = field(default=None)
     label: SideOptions | None = field(default=None)
     origin: str | DatetimeLike = field(default="start_day")
     offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None)
+    boundaries: Literal["exact", "trim"] | None = field(default=None, kw_only=True)
 
     index_grouper: CFTimeGrouper | pd.Grouper = field(init=False, repr=False)
     group_as_index: pd.Index = field(init=False, repr=False)
@@ -502,6 +508,7 @@ def reset(self) -> Self:
             label=self.label,
             origin=self.origin,
             offset=self.offset,
+            boundaries=self.boundaries,
         )
 
     def _init_properties(self, group: T_Group) -> None:
@@ -566,6 +573,22 @@ def factorize(self, group: T_Group) -> EncodedGroups:
         self._init_properties(group)
         full_index, first_items, codes_ = self._get_index_and_items()
         sbins = first_items.values.astype(np.int64)
+
+        # Handle boundaries parameter for exact checking
+        if self.boundaries == "exact":
+            # Check if data evenly fits the resampling frequency
+            counts = np.bincount(codes_)
+            expected_points = len(group) // len(first_items)
+            incomplete_periods = counts < expected_points
+
+            if np.any(incomplete_periods):
+                raise ValueError(
+                    f"Data does not evenly fit the resampling frequency. "
+                    f"Expected {expected_points} points per period, but found periods with "
+                    f"{counts[incomplete_periods]} points. Use boundaries='trim' "
+                    f"to handle incomplete periods."
+                )
+
         group_indices: GroupIndices = tuple(
             list(itertools.starmap(slice, pairwise(sbins))) + [slice(sbins[-1], None)]
         )
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
@@ -2081,6 +2081,24 @@ def test_resample_skipna(self) -> None:
         expected = DataArray([np.nan, 1, 1], [("time", times[::4])])
         assert_identical(result, expected)
 
+    def test_resample_boundaries(self) -> None:
+        """Test the boundaries parameter for resample."""
+        # Create 31-day data with predictable values (0-30)
+        times = pd.date_range("2000-01-01", periods=31, freq="D")
+        array = DataArray(np.arange(31), [("time", times)])
+
+        # Test boundaries="trim" - drops incomplete periods
+        result_trim = array.resample(time="7D", boundaries="trim").mean()
+        assert len(result_trim.time) == 4
+        expected_trim = np.array([3.0, 10.0, 17.0, 24.0])
+        np.testing.assert_array_equal(result_trim.values, expected_trim)
+
+        # Test boundaries="exact" - raises error for uneven data
+        with pytest.raises(
+            ValueError, match="Data does not evenly fit the resampling frequency"
+        ):
+            array.resample(time="7D", boundaries="exact").mean()
+
     def test_upsample(self) -> None:
         times = pd.date_range("2000-01-01", freq="6h", periods=5)
         array = DataArray(np.arange(5), [("time", times)])