Skip to content

Commit 33a4d41

Browse files
author
Giacomo Caria
committed
move trim logic to TimeResampler.factorize
1 parent 71bbab0 commit 33a4d41

File tree

2 files changed

+18
-33
lines changed

2 files changed

+18
-33
lines changed

xarray/core/common.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,38 +1129,6 @@ def _resample(
11291129
offset=offset,
11301130
boundaries=boundaries,
11311131
)
1132-
1133-
# Apply trim logic at the resample level if needed
1134-
if boundaries == "trim":
1135-
# First, get the resampling periods to identify incomplete ones
1136-
from xarray.core.groupby import ResolvedGrouper
1137-
1138-
temp_grouper = ResolvedGrouper(grouper, group, self)
1139-
temp_encoded = temp_grouper.encoded
1140-
1141-
# Count data points in each period
1142-
codes = temp_encoded.codes
1143-
counts = np.bincount(codes.values)
1144-
1145-
if len(counts) > 0:
1146-
# Find the most common count (expected points per period)
1147-
unique_counts, count_frequencies = np.unique(
1148-
counts, return_counts=True
1149-
)
1150-
most_common_count = unique_counts[np.argmax(count_frequencies)]
1151-
1152-
# Identify incomplete periods
1153-
incomplete_periods = counts < most_common_count
1154-
1155-
if np.any(incomplete_periods):
1156-
# Find which data points belong to incomplete periods
1157-
incomplete_codes = np.where(incomplete_periods)[0]
1158-
valid_mask = ~np.isin(codes.values, incomplete_codes)
1159-
1160-
# Filter the data to exclude incomplete periods
1161-
group = group.isel({group.dims[0]: valid_mask})
1162-
# Also update the object to match the filtered group
1163-
self = self.isel({group.dims[0]: valid_mask})
11641132
elif isinstance(freq, Resampler):
11651133
grouper = freq
11661134
else:

xarray/groupers.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ def factorize(self, group: T_Group) -> EncodedGroups:
574574
full_index, first_items, codes_ = self._get_index_and_items()
575575
sbins = first_items.values.astype(np.int64)
576576

577-
# Handle boundaries parameter for exact checking
577+
# Handle boundaries parameter for exact checking and trim logic
578578
if self.boundaries == "exact":
579579
# Check if data evenly fits the resampling frequency
580580
counts = np.bincount(codes_)
@@ -588,6 +588,23 @@ def factorize(self, group: T_Group) -> EncodedGroups:
588588
f"{counts[incomplete_periods]} points. Use boundaries='trim' "
589589
f"to handle incomplete periods."
590590
)
591+
elif self.boundaries == "trim":
592+
# Apply trim logic: set codes to -1 for incomplete periods
593+
counts = np.bincount(codes_)
594+
595+
if len(counts) > 0:
596+
# Find the most common count (expected points per period)
597+
unique_counts, count_frequencies = np.unique(counts, return_counts=True)
598+
most_common_count = unique_counts[np.argmax(count_frequencies)]
599+
600+
# Identify incomplete periods
601+
incomplete_periods = counts < most_common_count
602+
603+
if np.any(incomplete_periods):
604+
# Find which data points belong to incomplete periods
605+
incomplete_codes = np.where(incomplete_periods)[0]
606+
# Set codes to -1 for points in incomplete periods
607+
codes_[np.isin(codes_, incomplete_codes)] = -1
591608

592609
group_indices: GroupIndices = tuple(
593610
list(itertools.starmap(slice, pairwise(sbins))) + [slice(sbins[-1], None)]

0 commit comments

Comments
 (0)