From 409a8a6f848f9934ee9c91409f8741cc7e47eb75 Mon Sep 17 00:00:00 2001 From: Allan Otodi Opeto <103313919+AllanOXDi@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:13:01 +0300 Subject: [PATCH 1/3] improve HTML paste handling with sanitization --- .../composables/useToolbarActions.js | 62 ++++++++++------- .../TipTapEditor/utils/markdown.js | 67 ++++++++++++++++++- 2 files changed, 104 insertions(+), 25 deletions(-) diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js index 2745491656..573381fa8d 100644 --- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js +++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js @@ -1,5 +1,6 @@ import { computed, inject } from 'vue'; import { getTipTapEditorStrings } from '../TipTapEditorStrings'; +import { sanitizePastedHTML } from '../utils/markdown'; export function useToolbarActions(emit) { const editor = inject('editor', null); @@ -135,35 +136,50 @@ export function useToolbarActions(emit) { }; const handlePaste = async () => { - if (editor.value) { - try { - // Try HTML first - const clipboardData = await navigator.clipboard.read(); - const htmlType = clipboardData[0].types.find(type => type === 'text/html'); - - if (htmlType) { - const htmlBlob = await clipboardData[0].getType('text/html'); - const html = await htmlBlob.text(); - editor.value.chain().focus().insertContent(html).run(); - } else { - // Fall back to plain text - handlePasteNoFormat(); + if (!editor.value) return; + + try { + if (navigator.clipboard?.read) { + const items = await navigator.clipboard.read(); + + for (const item of items) { + if (item.types.includes('text/html')) { + const htmlBlob = await item.getType('text/html'); + const html = await htmlBlob.text(); + const cleaned = sanitizePastedHTML(html); + + editor.value.chain().focus().insertContent(cleaned).run(); + return; + } + if (item.types.includes('text/plain')) { + const textBlob = await item.getType('text/plain'); + const text = await textBlob.text(); + + editor.value.chain().focus().insertContent(text).run(); + return; + } } - } catch (err) { - editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run(); + return handlePasteNoFormat(); } + return handlePasteNoFormat(); + } catch (err) { + editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run(); } }; const handlePasteNoFormat = async () => { - if (editor.value) { - try { - // Read plain text from clipboard - const text = await navigator.clipboard.readText(); - editor.value.chain().focus().insertContent(text).run(); - } catch (err) { - editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run(); - } + if (!editor.value) return; + + try { + const text = await navigator.clipboard.readText(); + if (!text) return; + + // Note: Genereted this regex with the help of LLM. + const normalized = text.replace(/\r\n/g, '\n'); + + editor.value.chain().focus().insertContent(normalized).run(); + } catch (err) { + editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run(); } }; diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js index 26a1bd5c71..8d4ec4aaab 100644 --- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js +++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js @@ -51,6 +51,67 @@ export const paramsToMathMd = ({ latex }) => { return `$$${latex || ''}$$`; }; +export function sanitizePastedHTML(html) { + if (!html) return ''; + // This code ine 55 to 66 is geneted with the help of LLM with the prompt + // "Create a function that sanitizes HTML pasted from Microsoft + // Word by removing Word-specific tags, styles, and classes while preserving other formatting." + let cleaned = html; + cleaned = cleaned.replace(//gis, ''); + cleaned = cleaned.replace(/<\/?(w|m|o|v):[^>]*>/gis, ''); + const parser = new DOMParser(); + const doc = parser.parseFromString(cleaned, 'text/html'); + doc.querySelectorAll('*').forEach(el => { + if (el.hasAttribute('style')) { + const style = el.getAttribute('style') || ''; + const filtered = style + .split(';') + .map(s => s.trim()) + .filter(s => s && !s.toLowerCase().startsWith('mso-')) + .join('; '); + if (filtered) { + el.setAttribute('style', filtered); + } else { + el.removeAttribute('style'); + } + } + if (el.hasAttribute('class')) { + const cls = el + .getAttribute('class') + .split(/\s+/) + .filter(c => c && !/^Mso/i.test(c)) + .join(' '); + if (cls) { + el.setAttribute('class', cls); + } else { + el.removeAttribute('class'); + } + } + }); + const strikeElements = doc.querySelectorAll('s, strike, del'); + strikeElements.forEach(el => { + const nestedLists = el.querySelectorAll('ul, ol'); + if (nestedLists.length > 0) { + nestedLists.forEach(list => { + el.parentNode.insertBefore(list, el.nextSibling); + }); + } + }); + const lists = doc.querySelectorAll('ul, ol'); + lists.forEach(list => { + const items = list.querySelectorAll(':scope > li'); + items.forEach(item => { + const nestedLists = Array.from(item.children).filter( + child => child.tagName === 'UL' || child.tagName === 'OL', + ); + nestedLists.forEach(nestedList => { + item.appendChild(nestedList); + }); + }); + }); + return doc.body.innerHTML; +} + /** * Pre-processes a raw Markdown string to convert custom syntax into HTML tags * that Tiptap's extensions can understand. This is our custom "loader". @@ -86,7 +147,9 @@ export function preprocessMarkdown(markdown) { if (!params) return match; return ``; }); + let html = marked(processedMarkdown); + + html = sanitizePastedHTML(html); - // Use marked.js to parse the rest of the markdown - return marked(processedMarkdown); + return html; } From aa3e54d570e68fea93208d6e2ca4871ff6223980 Mon Sep 17 00:00:00 2001 From: Allan Otodi Opeto <103313919+AllanOXDi@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:52:54 +0300 Subject: [PATCH 2/3] fix failing test by removing HTML sanitization from markdown preprocessing --- .../shared/views/TipTapEditor/TipTapEditor/utils/markdown.js | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js index 8d4ec4aaab..20eb9ea2ed 100644 --- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js +++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js @@ -147,9 +147,6 @@ export function preprocessMarkdown(markdown) { if (!params) return match; return ``; }); - let html = marked(processedMarkdown); - html = sanitizePastedHTML(html); - - return html; + return marked(processedMarkdown); } From 8504d8516dd5dc436b5d21fc05e18e391718d3ea Mon Sep 17 00:00:00 2001 From: Allan Otodi Opeto <103313919+AllanOXDi@users.noreply.github.com> Date: Fri, 5 Dec 2025 20:12:39 +0300 Subject: [PATCH 3/3] remove redundant handlePasteNoFormat calls in paste handler --- .../TipTapEditor/TipTapEditor/composables/useToolbarActions.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js index 573381fa8d..58f10bbd94 100644 --- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js +++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js @@ -159,9 +159,7 @@ export function useToolbarActions(emit) { return; } } - return handlePasteNoFormat(); } - return handlePasteNoFormat(); } catch (err) { editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run(); }