|
6 | 6 | from contextlib import suppress |
7 | 7 | from html import escape |
8 | 8 | from textwrap import dedent |
9 | | -from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, Union, overload |
| 9 | +from typing import ( |
| 10 | + TYPE_CHECKING, |
| 11 | + Any, |
| 12 | + Concatenate, |
| 13 | + Literal, |
| 14 | + ParamSpec, |
| 15 | + TypeVar, |
| 16 | + Union, |
| 17 | + overload, |
| 18 | +) |
10 | 19 |
|
11 | 20 | import numpy as np |
12 | 21 | import pandas as pd |
@@ -925,6 +934,7 @@ def _resample( |
925 | 934 | offset: pd.Timedelta | datetime.timedelta | str | None, |
926 | 935 | origin: str | DatetimeLike, |
927 | 936 | restore_coord_dims: bool | None, |
| 937 | + boundaries: Literal["exact", "trim"] | None = None, |
928 | 938 | **indexer_kwargs: ResampleCompatible | Resampler, |
929 | 939 | ) -> T_Resample: |
930 | 940 | """Returns a Resample object for performing resampling operations. |
@@ -960,6 +970,11 @@ def _resample( |
960 | 970 | restore_coord_dims : bool, optional |
961 | 971 | If True, also restore the dimension order of multi-dimensional |
962 | 972 | coordinates. |
| 973 | + boundaries : {"exact", "trim"}, optional |
| 974 | + How to handle boundaries when the data doesn't evenly fit the resampling |
| 975 | + frequency. If 'exact', a ValueError will be raised if the data doesn't |
| 976 | + evenly fit. If 'trim', incomplete periods are dropped. If None (default), |
| 977 | + uses the current behavior (includes incomplete periods). |
963 | 978 | **indexer_kwargs : {dim: freq} |
964 | 979 | The keyword arguments form of ``indexer``. |
965 | 980 | One of indexer or indexer_kwargs must be provided. |
@@ -1107,8 +1122,45 @@ def _resample( |
1107 | 1122 | grouper: Resampler |
1108 | 1123 | if isinstance(freq, ResampleCompatible): |
1109 | 1124 | grouper = TimeResampler( |
1110 | | - freq=freq, closed=closed, label=label, origin=origin, offset=offset |
| 1125 | + freq=freq, |
| 1126 | + closed=closed, |
| 1127 | + label=label, |
| 1128 | + origin=origin, |
| 1129 | + offset=offset, |
| 1130 | + boundaries=boundaries, |
1111 | 1131 | ) |
| 1132 | + |
| 1133 | + # Apply trim logic at the resample level if needed |
| 1134 | + if boundaries == "trim": |
| 1135 | + # First, get the resampling periods to identify incomplete ones |
| 1136 | + from xarray.core.groupby import ResolvedGrouper |
| 1137 | + |
| 1138 | + temp_grouper = ResolvedGrouper(grouper, group, self) |
| 1139 | + temp_encoded = temp_grouper.encoded |
| 1140 | + |
| 1141 | + # Count data points in each period |
| 1142 | + codes = temp_encoded.codes |
| 1143 | + counts = np.bincount(codes.values) |
| 1144 | + |
| 1145 | + if len(counts) > 0: |
| 1146 | + # Find the most common count (expected points per period) |
| 1147 | + unique_counts, count_frequencies = np.unique( |
| 1148 | + counts, return_counts=True |
| 1149 | + ) |
| 1150 | + most_common_count = unique_counts[np.argmax(count_frequencies)] |
| 1151 | + |
| 1152 | + # Identify incomplete periods |
| 1153 | + incomplete_periods = counts < most_common_count |
| 1154 | + |
| 1155 | + if np.any(incomplete_periods): |
| 1156 | + # Find which data points belong to incomplete periods |
| 1157 | + incomplete_codes = np.where(incomplete_periods)[0] |
| 1158 | + valid_mask = ~np.isin(codes.values, incomplete_codes) |
| 1159 | + |
| 1160 | + # Filter the data to exclude incomplete periods |
| 1161 | + group = group.isel({group.dims[0]: valid_mask}) |
| 1162 | + # Also update the object to match the filtered group |
| 1163 | + self = self.isel({group.dims[0]: valid_mask}) |
1112 | 1164 | elif isinstance(freq, Resampler): |
1113 | 1165 | grouper = freq |
1114 | 1166 | else: |
|
0 commit comments