python · pablogsal · Dec 7, 2025 · Dec 8, 2025 · Dec 9, 2025
diff --git a/Lib/profiling/sampling/heatmap_collector.py b/Lib/profiling/sampling/heatmap_collector.py
@@ -477,6 +477,10 @@ def __init__(self, *args, **kwargs):
         # File index (populated during export)
         self.file_index = {}
 
+        # Reusable set for deduplicating line locations within a single sample.
+        # This avoids over-counting recursive functions in cumulative stats.
+        self._seen_lines = set()
+
     def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs):
         """Set profiling statistics to include in heatmap output.
 
@@ -509,6 +513,7 @@ def process_frames(self, frames, thread_id):
             thread_id: Thread ID for this stack trace
         """
         self._total_samples += 1
+        self._seen_lines.clear()
 
         # Count each line in the stack and build call graph
         for i, frame_info in enumerate(frames):
@@ -519,7 +524,13 @@ def process_frames(self, frames, thread_id):
 
             # frames[0] is the leaf - where execution is actually happening
             is_leaf = (i == 0)
-            self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf)
+            line_key = (filename, lineno)
+            count_cumulative = line_key not in self._seen_lines
+            if count_cumulative:
+                self._seen_lines.add(line_key)
+
+            self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf,
+                                     count_cumulative=count_cumulative)
 
             # Build call graph for adjacent frames
             if i + 1 < len(frames):
@@ -537,11 +548,13 @@ def _is_valid_frame(self, filename, lineno):
 
         return True
 
-    def _record_line_sample(self, filename, lineno, funcname, is_leaf=False):
+    def _record_line_sample(self, filename, lineno, funcname, is_leaf=False,
+                            count_cumulative=True):
         """Record a sample for a specific line."""
         # Track cumulative samples (all occurrences in stack)
-        self.line_samples[(filename, lineno)] += 1
-        self.file_samples[filename][lineno] += 1
+        if count_cumulative:
+            self.line_samples[(filename, lineno)] += 1
+            self.file_samples[filename][lineno] += 1
 
         # Track self/leaf samples (only when at top of stack)
         if is_leaf:

diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py
@@ -190,6 +190,8 @@ def __init__(
         # Trend tracking (initialized after colors are set up)
         self._trend_tracker = None
 
+        self._seen_locations = set()
+
     @property
     def elapsed_time(self):
         """Get the elapsed time, frozen when finished."""
@@ -285,13 +287,16 @@ def process_frames(self, frames, thread_id=None):
 
         # Get per-thread data if tracking per-thread
         thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None
+        self._seen_locations.clear()
 
         # Process each frame in the stack to track cumulative calls
         for frame in frames:
             location = (frame.filename, frame.lineno, frame.funcname)
-            self.result[location]["cumulative_calls"] += 1
-            if thread_data:
-                thread_data.result[location]["cumulative_calls"] += 1
+            if location not in self._seen_locations:
+                self._seen_locations.add(location)
+                self.result[location]["cumulative_calls"] += 1
+                if thread_data:
+                    thread_data.result[location]["cumulative_calls"] += 1
 
         # The top frame gets counted as an inline call (directly executing)
         top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname)

diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py
@@ -16,16 +16,21 @@ def __init__(self, sample_interval_usec, *, skip_idle=False):
             lambda: collections.defaultdict(int)
         )
         self.skip_idle = skip_idle
+        self._seen_locations = set()
 
     def _process_frames(self, frames):
         """Process a single thread's frame stack."""
         if not frames:
             return
 
+        self._seen_locations.clear()
+
         # Process each frame in the stack to track cumulative calls
         for frame in frames:
             location = (frame.filename, frame.lineno, frame.funcname)
-            self.result[location]["cumulative_calls"] += 1
+            if location not in self._seen_locations:
+                self._seen_locations.add(location)
+                self.result[location]["cumulative_calls"] += 1
 
         # The top frame gets counted as an inline call (directly executing)
         top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -85,7 +85,7 @@ def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
         # Should still process the frames
         self.assertEqual(len(collector.result), 1)
 
-        # Test collecting duplicate frames in same sample
+        # Test collecting duplicate frames in same sample (recursive function)
         test_frames = [
             MockInterpreterInfo(
                 0,  # interpreter_id
@@ -94,17 +94,17 @@ def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
                         1,
                         [
                             MockFrameInfo("file.py", 10, "func1"),
-                            MockFrameInfo("file.py", 10, "func1"),  # Duplicate
+                            MockFrameInfo("file.py", 10, "func1"),  # Duplicate (recursion)
                         ],
                     )
                 ],
             )
         ]
         collector = PstatsCollector(sample_interval_usec=1000)
         collector.collect(test_frames)
-        # Should count both occurrences
+        # Should count only once per sample to avoid over-counting recursive functions
         self.assertEqual(
-            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 2
+            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 1
         )
 
     def test_pstats_collector_single_frame_stacks(self):
@@ -1201,3 +1201,194 @@ def test_flamegraph_collector_per_thread_gc_percentage(self):
         self.assertEqual(collector.per_thread_stats[2]["gc_samples"], 1)
         self.assertEqual(collector.per_thread_stats[2]["total"], 6)
         self.assertAlmostEqual(per_thread_stats[2]["gc_pct"], 10.0, places=1)
+
+
+class TestRecursiveFunctionHandling(unittest.TestCase):
+    """Tests for correct handling of recursive functions in cumulative stats."""
+
+    def test_pstats_collector_recursive_function_single_sample(self):
+        """Test that recursive functions are counted once per sample, not per occurrence."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Simulate a recursive function appearing 5 times in one sample
+        recursive_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                        ],
+                    )
+                ],
+            )
+        ]
+        collector.collect(recursive_frames)
+
+        location = ("test.py", 10, "recursive_func")
+        # Should count as 1 cumulative call (present in 1 sample), not 5
+        self.assertEqual(collector.result[location]["cumulative_calls"], 1)
+        # Direct calls should be 1 (top of stack)
+        self.assertEqual(collector.result[location]["direct_calls"], 1)
+
+    def test_pstats_collector_recursive_function_multiple_samples(self):
+        """Test cumulative counting across multiple samples with recursion."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Sample 1: recursive function at depth 3
+        sample1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                        ],
+                    )
+                ],
+            )
+        ]
+        # Sample 2: recursive function at depth 2
+        sample2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                        ],
+                    )
+                ],
+            )
+        ]
+        # Sample 3: recursive function at depth 4
+        sample3 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                            MockFrameInfo("test.py", 10, "recursive_func"),
+                        ],
+                    )
+                ],
+            )
+        ]
+
+        collector.collect(sample1)
+        collector.collect(sample2)
+        collector.collect(sample3)
+
+        location = ("test.py", 10, "recursive_func")
+        # Should count as 3 cumulative calls (present in 3 samples)
+        # Not 3+2+4=9 which would be the buggy behavior
+        self.assertEqual(collector.result[location]["cumulative_calls"], 3)
+        self.assertEqual(collector.result[location]["direct_calls"], 3)
+
+    def test_pstats_collector_mixed_recursive_and_nonrecursive(self):
+        """Test a call stack with both recursive and non-recursive functions."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Stack: main -> foo (recursive x3) -> bar
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("test.py", 50, "bar"),       # top of stack
+                            MockFrameInfo("test.py", 20, "foo"),      # recursive
+                            MockFrameInfo("test.py", 20, "foo"),      # recursive
+                            MockFrameInfo("test.py", 20, "foo"),      # recursive
+                            MockFrameInfo("test.py", 10, "main"),     # bottom
+                        ],
+                    )
+                ],
+            )
+        ]
+        collector.collect(frames)
+
+        # bar: 1 cumulative (in stack), 1 direct (top)
+        self.assertEqual(collector.result[("test.py", 50, "bar")]["cumulative_calls"], 1)
+        self.assertEqual(collector.result[("test.py", 50, "bar")]["direct_calls"], 1)
+
+        # foo: 1 cumulative (counted once despite 3 occurrences), 0 direct
+        self.assertEqual(collector.result[("test.py", 20, "foo")]["cumulative_calls"], 1)
+        self.assertEqual(collector.result[("test.py", 20, "foo")]["direct_calls"], 0)
+
+        # main: 1 cumulative, 0 direct
+        self.assertEqual(collector.result[("test.py", 10, "main")]["cumulative_calls"], 1)
+        self.assertEqual(collector.result[("test.py", 10, "main")]["direct_calls"], 0)
+
+    def test_pstats_collector_cumulative_percentage_cannot_exceed_100(self):
+        """Test that cumulative percentage stays <= 100% even with deep recursion."""
+        collector = PstatsCollector(sample_interval_usec=1000000)  # 1 second for easy math
+
+        # Collect 10 samples, each with recursive function at depth 100
+        for _ in range(10):
+            frames = [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [MockFrameInfo("test.py", 10, "deep_recursive")] * 100,
+                        )
+                    ],
+                )
+            ]
+            collector.collect(frames)
+
+        location = ("test.py", 10, "deep_recursive")
+        # Cumulative calls should be 10 (number of samples), not 1000
+        self.assertEqual(collector.result[location]["cumulative_calls"], 10)
+
+        # Verify stats calculation gives correct percentage
+        collector.create_stats()
+        stats = collector.stats[location]
+        # stats format: (direct_calls, cumulative_calls, total_time, cumulative_time, callers)
+        cumulative_calls = stats[1]
+        self.assertEqual(cumulative_calls, 10)
+
+    def test_pstats_collector_different_lines_same_function_counted_separately(self):
+        """Test that different line numbers in same function are tracked separately."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Function with multiple line numbers (e.g., different call sites within recursion)
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("test.py", 15, "func"),  # line 15
+                            MockFrameInfo("test.py", 12, "func"),  # line 12
+                            MockFrameInfo("test.py", 15, "func"),  # line 15 again
+                            MockFrameInfo("test.py", 10, "func"),  # line 10
+                        ],
+                    )
+                ],
+            )
+        ]
+        collector.collect(frames)
+
+        # Each unique (file, line, func) should be counted once
+        self.assertEqual(collector.result[("test.py", 15, "func")]["cumulative_calls"], 1)
+        self.assertEqual(collector.result[("test.py", 12, "func")]["cumulative_calls"], 1)
+        self.assertEqual(collector.result[("test.py", 10, "func")]["cumulative_calls"], 1)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
@@ -121,16 +121,17 @@ def test_recursive_function_call_counting(self):
         self.assertIn(fib_key, collector.stats)
         self.assertIn(main_key, collector.stats)
 
-        # Fibonacci should have many calls due to recursion
+        # Fibonacci: counted once per sample, not per occurrence
         fib_stats = collector.stats[fib_key]
         direct_calls, cumulative_calls, tt, ct, callers = fib_stats
 
-        # Should have recorded multiple calls (9 total appearances in samples)
-        self.assertEqual(cumulative_calls, 9)
-        self.assertGreater(tt, 0)  # Should have some total time
-        self.assertGreater(ct, 0)  # Should have some cumulative time
+        # Should count 3 (present in 3 samples), not 9 (total occurrences)
+        self.assertEqual(cumulative_calls, 3)
+        self.assertEqual(direct_calls, 3)  # Top of stack in all samples
+        self.assertGreater(tt, 0)
+        self.assertGreater(ct, 0)
 
-        # Main should have fewer calls
+        # Main should also have 3 cumulative calls (in all 3 samples)
         main_stats = collector.stats[main_key]
         main_direct_calls, main_cumulative_calls = main_stats[0], main_stats[1]
         self.assertEqual(main_direct_calls, 0)  # Never directly executing