From 53e3175a41355b641a4ec3a6d68314c404f3f4c1 Mon Sep 17 00:00:00 2001 From: James Xiao Date: Tue, 26 May 2026 15:52:21 +0800 Subject: [PATCH 1/5] fix(core): use raw data-start for media elements in preview visibility loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For video and audio elements, data-start is authored in global (composition-root) time — the same contract used by the render pipeline's discoverMediaFromBrowser, which reads the raw attribute directly. Previously, the visibility loop called resolveStartForElement which adds the nearest ancestor composition's global start on top, causing a double-offset that kept pip-wired media permanently hidden when the host composition did not start at t=0. Example: a pip video with data-start="45.40" inside a host composition that also starts at data-start="45.40" resolved to 90.80, so the video was always hidden during its actual [45.40, 52.46] window. Non-media elements (divs, sections, etc.) continue to use the accumulating resolver because their data-start values are local to their composition. Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/runtime/init.test.ts | 62 ++++++++++++++++++++++++++ packages/core/src/runtime/init.ts | 10 ++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/packages/core/src/runtime/init.test.ts b/packages/core/src/runtime/init.test.ts index 3a3ffa780..4122095a9 100644 --- a/packages/core/src/runtime/init.test.ts +++ b/packages/core/src/runtime/init.test.ts @@ -477,6 +477,68 @@ describe("initSandboxRuntimeModular", () => { expect(hookHost.style.visibility).toBe("visible"); }); + it("shows pip video at global start time even when host composition starts late", () => { + // Regression: resolveStartForElement used to add the host composition's start on top of + // the video's own data-start, causing double-offset. A pip video with data-start="45.40" + // inside a host at data-start="45.40" would resolve to 90.80 and stay permanently hidden. + const root = document.createElement("div"); + root.setAttribute("data-composition-id", "main"); + root.setAttribute("data-root", "true"); + root.setAttribute("data-start", "0"); + root.setAttribute("data-width", "1920"); + root.setAttribute("data-height", "1080"); + document.body.appendChild(root); + + const host = document.createElement("div"); + host.setAttribute("data-composition-id", "scene-pip"); + host.setAttribute("data-start", "45.40"); + host.setAttribute("data-duration", "7.06"); + root.appendChild(host); + + const innerRoot = document.createElement("div"); + innerRoot.setAttribute("data-composition-id", "scene-pip"); + host.appendChild(innerRoot); + + // pip-wired video: data-start is authored in global time (same value as host) + const pipVideo = document.createElement("video"); + pipVideo.setAttribute("data-start", "45.40"); + pipVideo.setAttribute("data-duration", "7.06"); + Object.defineProperty(pipVideo, "paused", { value: true, configurable: true }); + Object.defineProperty(pipVideo, "readyState", { value: 0, configurable: true }); + Object.defineProperty(pipVideo, "currentTime", { + value: 0, + writable: true, + configurable: true, + }); + pipVideo.load = () => {}; + innerRoot.appendChild(pipVideo); + + (window as Window & { __timelines?: Record }).__timelines = { + main: createMockTimeline(60), + "scene-pip": createMockTimeline(7.06), + }; + + initSandboxRuntimeModular(); + + const player = ( + window as Window & { + __player?: { seek: (timeSeconds: number) => void }; + } + ).__player; + expect(player).toBeDefined(); + + // Before the fix: resolveStartForElement(pipVideo) = 45.40 + 45.40 = 90.80, so the + // video would be hidden at t=46 (90.80 > 46). After the fix: start = 45.40, visible. + player?.seek(46); + expect(pipVideo.style.visibility).toBe("visible"); + + player?.seek(53); + expect(pipVideo.style.visibility).toBe("hidden"); + + player?.seek(44); + expect(pipVideo.style.visibility).toBe("hidden"); + }); + it("plays scheduled child timelines without a captured root timeline when audio has failed", () => { const raf = createManualRaf(); vi.spyOn(performance, "now").mockImplementation(() => raf.now()); diff --git a/packages/core/src/runtime/init.ts b/packages/core/src/runtime/init.ts index 2357cb24f..ed7e65570 100644 --- a/packages/core/src/runtime/init.ts +++ b/packages/core/src/runtime/init.ts @@ -1329,7 +1329,15 @@ export function initSandboxRuntimeModular(): void { const tag = rawNode.tagName.toLowerCase(); if (tag === "script" || tag === "style" || tag === "link" || tag === "meta") continue; - const start = resolveStartForElement(rawNode, 0); + // For media elements (video/audio) data-start is authored in global (composition-root) + // time — the same contract used by the render pipeline's discoverMediaFromBrowser which + // reads the raw attribute directly. Calling resolveStartForElement would add the nearest + // ancestor composition's start a second time, creating a double-offset that keeps the + // element permanently hidden when its host composition does not start at t=0. + const isMediaElement = tag === "video" || tag === "audio"; + const start = isMediaElement + ? Math.max(0, Number(rawNode.getAttribute("data-start") ?? 0) || 0) + : resolveStartForElement(rawNode, 0); let duration = resolveDurationForElement(rawNode); const compId = rawNode.getAttribute("data-composition-id"); if (compId) { From 5272d62297bfe86e5abb1454617086287bba52e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel?= Date: Tue, 26 May 2026 16:53:11 +0000 Subject: [PATCH 2/5] fix(core): extend media start fix to all consumers, guard auto-start Narrow the raw data-start read to media elements without data-hf-auto-start (explicitly authored global coordinates). Elements with auto-injected data-start="0" remain composition-local via the resolver. Apply consistently across all three consumers: - visibility loop (init.ts) - refreshRuntimeMediaCache start/duration (init.ts) - resolveMediaWindowEndSeconds (timeline.ts) Add regression test for auto-injected data-start="0" inside a late-starting host to prove it doesn't regress. --- packages/core/src/runtime/init.test.ts | 57 ++++++++++++++++++++++++++ packages/core/src/runtime/init.ts | 23 +++++++---- packages/core/src/runtime/timeline.ts | 4 +- 3 files changed, 75 insertions(+), 9 deletions(-) diff --git a/packages/core/src/runtime/init.test.ts b/packages/core/src/runtime/init.test.ts index 4122095a9..12d712a19 100644 --- a/packages/core/src/runtime/init.test.ts +++ b/packages/core/src/runtime/init.test.ts @@ -539,6 +539,63 @@ describe("initSandboxRuntimeModular", () => { expect(pipVideo.style.visibility).toBe("hidden"); }); + it("shows auto-injected video at host time, not at t=0", () => { + const root = document.createElement("div"); + root.setAttribute("data-composition-id", "main"); + root.setAttribute("data-root", "true"); + root.setAttribute("data-start", "0"); + root.setAttribute("data-width", "1920"); + root.setAttribute("data-height", "1080"); + document.body.appendChild(root); + + const host = document.createElement("div"); + host.setAttribute("data-composition-id", "intro"); + host.setAttribute("data-start", "10"); + host.setAttribute("data-duration", "5"); + root.appendChild(host); + + const innerRoot = document.createElement("div"); + innerRoot.setAttribute("data-composition-id", "intro"); + host.appendChild(innerRoot); + + const video = document.createElement("video"); + video.setAttribute("data-start", "0"); + video.setAttribute("data-hf-auto-start", ""); + video.setAttribute("data-duration", "5"); + Object.defineProperty(video, "paused", { value: true, configurable: true }); + Object.defineProperty(video, "readyState", { value: 0, configurable: true }); + Object.defineProperty(video, "currentTime", { + value: 0, + writable: true, + configurable: true, + }); + video.load = () => {}; + innerRoot.appendChild(video); + + (window as Window & { __timelines?: Record }).__timelines = { + main: createMockTimeline(30), + intro: createMockTimeline(5), + }; + + initSandboxRuntimeModular(); + + const player = ( + window as Window & { + __player?: { seek: (timeSeconds: number) => void }; + } + ).__player; + expect(player).toBeDefined(); + + player?.seek(12); + expect(video.style.visibility).toBe("visible"); + + player?.seek(5); + expect(video.style.visibility).toBe("hidden"); + + player?.seek(16); + expect(video.style.visibility).toBe("hidden"); + }); + it("plays scheduled child timelines without a captured root timeline when audio has failed", () => { const raf = createManualRaf(); vi.spyOn(performance, "now").mockImplementation(() => raf.now()); diff --git a/packages/core/src/runtime/init.ts b/packages/core/src/runtime/init.ts index ed7e65570..1e32a71e9 100644 --- a/packages/core/src/runtime/init.ts +++ b/packages/core/src/runtime/init.ts @@ -1278,6 +1278,9 @@ export function initSandboxRuntimeModular(): void { element.hasAttribute("data-start") || Boolean(resolveMediaCompositionContext(element).compositionRoot), resolveStartSeconds: (element) => { + if (!element.hasAttribute("data-hf-auto-start") && element.hasAttribute("data-start")) { + return Math.max(0, Number(element.getAttribute("data-start") ?? 0) || 0); + } const context = resolveMediaCompositionContext( element as HTMLVideoElement | HTMLAudioElement, ); @@ -1285,7 +1288,10 @@ export function initSandboxRuntimeModular(): void { }, resolveDurationSeconds: (element) => { const context = resolveMediaCompositionContext(element); - const start = resolveStartForElement(element, context.inheritedStart ?? 0); + const start = + !element.hasAttribute("data-hf-auto-start") && element.hasAttribute("data-start") + ? Math.max(0, Number(element.getAttribute("data-start") ?? 0) || 0) + : resolveStartForElement(element, context.inheritedStart ?? 0); const mediaStart = Number.parseFloat(element.dataset.playbackStart ?? element.dataset.mediaStart ?? "0") || 0; @@ -1329,13 +1335,14 @@ export function initSandboxRuntimeModular(): void { const tag = rawNode.tagName.toLowerCase(); if (tag === "script" || tag === "style" || tag === "link" || tag === "meta") continue; - // For media elements (video/audio) data-start is authored in global (composition-root) - // time — the same contract used by the render pipeline's discoverMediaFromBrowser which - // reads the raw attribute directly. Calling resolveStartForElement would add the nearest - // ancestor composition's start a second time, creating a double-offset that keeps the - // element permanently hidden when its host composition does not start at t=0. - const isMediaElement = tag === "video" || tag === "audio"; - const start = isMediaElement + // Media elements with explicitly authored data-start (no data-hf-auto-start + // marker) use global coordinates — matching the render pipeline's + // discoverMediaFromBrowser. resolveStartForElement would add the host + // composition's offset a second time. Auto-injected data-start="0" + // (data-hf-auto-start present) is composition-local and needs the resolver. + const isGlobalMediaStart = + (tag === "video" || tag === "audio") && !rawNode.hasAttribute("data-hf-auto-start"); + const start = isGlobalMediaStart ? Math.max(0, Number(rawNode.getAttribute("data-start") ?? 0) || 0) : resolveStartForElement(rawNode, 0); let duration = resolveDurationForElement(rawNode); diff --git a/packages/core/src/runtime/timeline.ts b/packages/core/src/runtime/timeline.ts index 4d598fb04..1aa234ee3 100644 --- a/packages/core/src/runtime/timeline.ts +++ b/packages/core/src/runtime/timeline.ts @@ -220,7 +220,9 @@ export function collectRuntimeTimelinePayload(params: { if (mediaNodes.length === 0) return null; let maxWindowEndSeconds = 0; for (const mediaNode of mediaNodes) { - const start = startResolver.resolveStartForElement(mediaNode, 0); + const start = !mediaNode.hasAttribute("data-hf-auto-start") + ? Math.max(0, Number(mediaNode.getAttribute("data-start") ?? 0) || 0) + : startResolver.resolveStartForElement(mediaNode, 0); if (!Number.isFinite(start)) continue; const duration = resolveMediaElementDurationSeconds(mediaNode); if (duration == null || duration <= 0) continue; From a0b3c4202fbf1ee99adefb9241cfb9fa3d825906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel?= Date: Tue, 26 May 2026 17:35:47 +0000 Subject: [PATCH 3/5] test(producer): add pip-video-late-host regression test Renders a 6s composition with a pip video inside a sub-composition host starting at t=3. Verifies the video is visible during the host window and not double-offset to t=6. Baseline generated in Docker. --- .../tests/pip-video-late-host/meta.json | 12 + .../pip-video-late-host/output/compiled.html | 385 ++++++++++++++++++ .../pip-video-late-host/output/output.mp4 | 3 + .../tests/pip-video-late-host/src/index.html | 72 ++++ 4 files changed, 472 insertions(+) create mode 100644 packages/producer/tests/pip-video-late-host/meta.json create mode 100644 packages/producer/tests/pip-video-late-host/output/compiled.html create mode 100644 packages/producer/tests/pip-video-late-host/output/output.mp4 create mode 100644 packages/producer/tests/pip-video-late-host/src/index.html diff --git a/packages/producer/tests/pip-video-late-host/meta.json b/packages/producer/tests/pip-video-late-host/meta.json new file mode 100644 index 000000000..4b21ddfb3 --- /dev/null +++ b/packages/producer/tests/pip-video-late-host/meta.json @@ -0,0 +1,12 @@ +{ + "name": "Pip video inside late-starting sub-composition host", + "description": "Regression for: resolveStartForElement double-counts the host offset for media elements with explicitly authored data-start. A pip video at data-start='3' inside a host at data-start='3' resolved to 6.0 instead of 3.0, keeping the video permanently hidden during the host's window.", + "tags": ["video", "sub-composition", "pip", "regression"], + "minPsnr": 25, + "maxFrameFailures": 5, + "minAudioCorrelation": 0.0, + "maxAudioLagWindows": 120, + "renderConfig": { + "fps": 24 + } +} diff --git a/packages/producer/tests/pip-video-late-host/output/compiled.html b/packages/producer/tests/pip-video-late-host/output/compiled.html new file mode 100644 index 000000000..68d168d4f --- /dev/null +++ b/packages/producer/tests/pip-video-late-host/output/compiled.html @@ -0,0 +1,385 @@ + + + + + + + + + +
+ +
+
INTRO (0–3s)
+
+ + +
+ + + + +
PIP VIDEO (3–6s)
+ + + + +
+
+ + + + diff --git a/packages/producer/tests/pip-video-late-host/output/output.mp4 b/packages/producer/tests/pip-video-late-host/output/output.mp4 new file mode 100644 index 000000000..cbf8586b8 --- /dev/null +++ b/packages/producer/tests/pip-video-late-host/output/output.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f36c6d01c89e18934712248877fe29376f292c827d54df2622cb8c582bf4031 +size 4568335 diff --git a/packages/producer/tests/pip-video-late-host/src/index.html b/packages/producer/tests/pip-video-late-host/src/index.html new file mode 100644 index 000000000..b74661c79 --- /dev/null +++ b/packages/producer/tests/pip-video-late-host/src/index.html @@ -0,0 +1,72 @@ + + + + + + + + + +
+ +
+
INTRO (0–3s)
+
+ + +
+
+ + + + From 5e25c15516c990708ad5acf1832f28fa1cbb7030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel?= Date: Tue, 26 May 2026 17:37:01 +0000 Subject: [PATCH 4/5] fix: include pip sub-composition source file --- .../src/compositions/pip.html | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 packages/producer/tests/pip-video-late-host/src/compositions/pip.html diff --git a/packages/producer/tests/pip-video-late-host/src/compositions/pip.html b/packages/producer/tests/pip-video-late-host/src/compositions/pip.html new file mode 100644 index 000000000..9a4be1721 --- /dev/null +++ b/packages/producer/tests/pip-video-late-host/src/compositions/pip.html @@ -0,0 +1,63 @@ + + From ca39687789f52a5a38ec9152f951c9af695aaed3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel?= Date: Tue, 26 May 2026 18:02:23 +0000 Subject: [PATCH 5/5] fix(core): patch resolveMediaWindowDurationSeconds + extract helper Fix the 4th unguarded resolveStartForElement call site in resolveMediaWindowDurationSeconds that inflated the timeline duration floor for pip compositions. Extract resolveMediaStartSeconds helper to consolidate the data-hf-auto-start guard across all call sites. --- packages/core/src/runtime/init.ts | 34 ++++++++++++++----------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/packages/core/src/runtime/init.ts b/packages/core/src/runtime/init.ts index 1e32a71e9..4b3b9b4f2 100644 --- a/packages/core/src/runtime/init.ts +++ b/packages/core/src/runtime/init.ts @@ -387,6 +387,14 @@ export function initSandboxRuntimeModular(): void { }); return resolver.resolveDurationForElement(element); }; + + const resolveMediaStartSeconds = (element: Element, fallback = 0): number => { + if (!element.hasAttribute("data-hf-auto-start") && element.hasAttribute("data-start")) { + return Math.max(0, Number(element.getAttribute("data-start") ?? 0) || 0); + } + return resolveStartForElement(element, fallback); + }; + const hasExternalCompositions = !!document.querySelector("[data-composition-src]"); let hasInlineTemplateCompositions = false; { @@ -456,7 +464,7 @@ export function initSandboxRuntimeModular(): void { if (mediaNodes.length === 0) return null; let maxWindowEndSeconds = 0; for (const node of mediaNodes) { - const start = resolveStartForElement(node, 0); + const start = resolveMediaStartSeconds(node, 0); if (!Number.isFinite(start)) continue; const duration = resolveMediaElementDurationSeconds(node); if (duration == null || duration <= MIN_VALID_TIMELINE_DURATION_SECONDS) continue; @@ -1278,20 +1286,14 @@ export function initSandboxRuntimeModular(): void { element.hasAttribute("data-start") || Boolean(resolveMediaCompositionContext(element).compositionRoot), resolveStartSeconds: (element) => { - if (!element.hasAttribute("data-hf-auto-start") && element.hasAttribute("data-start")) { - return Math.max(0, Number(element.getAttribute("data-start") ?? 0) || 0); - } const context = resolveMediaCompositionContext( element as HTMLVideoElement | HTMLAudioElement, ); - return resolveStartForElement(element, context.inheritedStart ?? 0); + return resolveMediaStartSeconds(element, context.inheritedStart ?? 0); }, resolveDurationSeconds: (element) => { const context = resolveMediaCompositionContext(element); - const start = - !element.hasAttribute("data-hf-auto-start") && element.hasAttribute("data-start") - ? Math.max(0, Number(element.getAttribute("data-start") ?? 0) || 0) - : resolveStartForElement(element, context.inheritedStart ?? 0); + const start = resolveMediaStartSeconds(element, context.inheritedStart ?? 0); const mediaStart = Number.parseFloat(element.dataset.playbackStart ?? element.dataset.mediaStart ?? "0") || 0; @@ -1335,16 +1337,10 @@ export function initSandboxRuntimeModular(): void { const tag = rawNode.tagName.toLowerCase(); if (tag === "script" || tag === "style" || tag === "link" || tag === "meta") continue; - // Media elements with explicitly authored data-start (no data-hf-auto-start - // marker) use global coordinates — matching the render pipeline's - // discoverMediaFromBrowser. resolveStartForElement would add the host - // composition's offset a second time. Auto-injected data-start="0" - // (data-hf-auto-start present) is composition-local and needs the resolver. - const isGlobalMediaStart = - (tag === "video" || tag === "audio") && !rawNode.hasAttribute("data-hf-auto-start"); - const start = isGlobalMediaStart - ? Math.max(0, Number(rawNode.getAttribute("data-start") ?? 0) || 0) - : resolveStartForElement(rawNode, 0); + const start = + tag === "video" || tag === "audio" + ? resolveMediaStartSeconds(rawNode, 0) + : resolveStartForElement(rawNode, 0); let duration = resolveDurationForElement(rawNode); const compId = rawNode.getAttribute("data-composition-id"); if (compId) {