From 409a8a6f848f9934ee9c91409f8741cc7e47eb75 Mon Sep 17 00:00:00 2001
From: Allan Otodi Opeto <103313919+AllanOXDi@users.noreply.github.com>
Date: Thu, 4 Dec 2025 22:13:01 +0300
Subject: [PATCH 1/3] improve HTML paste handling with sanitization
---
.../composables/useToolbarActions.js | 62 ++++++++++-------
.../TipTapEditor/utils/markdown.js | 67 ++++++++++++++++++-
2 files changed, 104 insertions(+), 25 deletions(-)
diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js
index 2745491656..573381fa8d 100644
--- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js
+++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js
@@ -1,5 +1,6 @@
import { computed, inject } from 'vue';
import { getTipTapEditorStrings } from '../TipTapEditorStrings';
+import { sanitizePastedHTML } from '../utils/markdown';
export function useToolbarActions(emit) {
const editor = inject('editor', null);
@@ -135,35 +136,50 @@ export function useToolbarActions(emit) {
};
const handlePaste = async () => {
- if (editor.value) {
- try {
- // Try HTML first
- const clipboardData = await navigator.clipboard.read();
- const htmlType = clipboardData[0].types.find(type => type === 'text/html');
-
- if (htmlType) {
- const htmlBlob = await clipboardData[0].getType('text/html');
- const html = await htmlBlob.text();
- editor.value.chain().focus().insertContent(html).run();
- } else {
- // Fall back to plain text
- handlePasteNoFormat();
+ if (!editor.value) return;
+
+ try {
+ if (navigator.clipboard?.read) {
+ const items = await navigator.clipboard.read();
+
+ for (const item of items) {
+ if (item.types.includes('text/html')) {
+ const htmlBlob = await item.getType('text/html');
+ const html = await htmlBlob.text();
+ const cleaned = sanitizePastedHTML(html);
+
+ editor.value.chain().focus().insertContent(cleaned).run();
+ return;
+ }
+ if (item.types.includes('text/plain')) {
+ const textBlob = await item.getType('text/plain');
+ const text = await textBlob.text();
+
+ editor.value.chain().focus().insertContent(text).run();
+ return;
+ }
}
- } catch (err) {
- editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
+ return handlePasteNoFormat();
}
+ return handlePasteNoFormat();
+ } catch (err) {
+ editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
}
};
const handlePasteNoFormat = async () => {
- if (editor.value) {
- try {
- // Read plain text from clipboard
- const text = await navigator.clipboard.readText();
- editor.value.chain().focus().insertContent(text).run();
- } catch (err) {
- editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
- }
+ if (!editor.value) return;
+
+ try {
+ const text = await navigator.clipboard.readText();
+ if (!text) return;
+
+ // Note: Genereted this regex with the help of LLM.
+ const normalized = text.replace(/\r\n/g, '\n');
+
+ editor.value.chain().focus().insertContent(normalized).run();
+ } catch (err) {
+ editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
}
};
diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js
index 26a1bd5c71..8d4ec4aaab 100644
--- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js
+++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js
@@ -51,6 +51,67 @@ export const paramsToMathMd = ({ latex }) => {
return `$$${latex || ''}$$`;
};
+export function sanitizePastedHTML(html) {
+ if (!html) return '';
+ // This code ine 55 to 66 is geneted with the help of LLM with the prompt
+ // "Create a function that sanitizes HTML pasted from Microsoft
+ // Word by removing Word-specific tags, styles, and classes while preserving other formatting."
+ let cleaned = html;
+ cleaned = cleaned.replace(//gis, '');
+ cleaned = cleaned.replace(/<\/?(w|m|o|v):[^>]*>/gis, '');
+ const parser = new DOMParser();
+ const doc = parser.parseFromString(cleaned, 'text/html');
+ doc.querySelectorAll('*').forEach(el => {
+ if (el.hasAttribute('style')) {
+ const style = el.getAttribute('style') || '';
+ const filtered = style
+ .split(';')
+ .map(s => s.trim())
+ .filter(s => s && !s.toLowerCase().startsWith('mso-'))
+ .join('; ');
+ if (filtered) {
+ el.setAttribute('style', filtered);
+ } else {
+ el.removeAttribute('style');
+ }
+ }
+ if (el.hasAttribute('class')) {
+ const cls = el
+ .getAttribute('class')
+ .split(/\s+/)
+ .filter(c => c && !/^Mso/i.test(c))
+ .join(' ');
+ if (cls) {
+ el.setAttribute('class', cls);
+ } else {
+ el.removeAttribute('class');
+ }
+ }
+ });
+ const strikeElements = doc.querySelectorAll('s, strike, del');
+ strikeElements.forEach(el => {
+ const nestedLists = el.querySelectorAll('ul, ol');
+ if (nestedLists.length > 0) {
+ nestedLists.forEach(list => {
+ el.parentNode.insertBefore(list, el.nextSibling);
+ });
+ }
+ });
+ const lists = doc.querySelectorAll('ul, ol');
+ lists.forEach(list => {
+ const items = list.querySelectorAll(':scope > li');
+ items.forEach(item => {
+ const nestedLists = Array.from(item.children).filter(
+ child => child.tagName === 'UL' || child.tagName === 'OL',
+ );
+ nestedLists.forEach(nestedList => {
+ item.appendChild(nestedList);
+ });
+ });
+ });
+ return doc.body.innerHTML;
+}
+
/**
* Pre-processes a raw Markdown string to convert custom syntax into HTML tags
* that Tiptap's extensions can understand. This is our custom "loader".
@@ -86,7 +147,9 @@ export function preprocessMarkdown(markdown) {
if (!params) return match;
return ``;
});
+ let html = marked(processedMarkdown);
+
+ html = sanitizePastedHTML(html);
- // Use marked.js to parse the rest of the markdown
- return marked(processedMarkdown);
+ return html;
}
From aa3e54d570e68fea93208d6e2ca4871ff6223980 Mon Sep 17 00:00:00 2001
From: Allan Otodi Opeto <103313919+AllanOXDi@users.noreply.github.com>
Date: Thu, 4 Dec 2025 22:52:54 +0300
Subject: [PATCH 2/3] fix failing test by removing HTML sanitization from
markdown preprocessing
---
.../shared/views/TipTapEditor/TipTapEditor/utils/markdown.js | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js
index 8d4ec4aaab..20eb9ea2ed 100644
--- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js
+++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js
@@ -147,9 +147,6 @@ export function preprocessMarkdown(markdown) {
if (!params) return match;
return ``;
});
- let html = marked(processedMarkdown);
- html = sanitizePastedHTML(html);
-
- return html;
+ return marked(processedMarkdown);
}
From 8504d8516dd5dc436b5d21fc05e18e391718d3ea Mon Sep 17 00:00:00 2001
From: Allan Otodi Opeto <103313919+AllanOXDi@users.noreply.github.com>
Date: Fri, 5 Dec 2025 20:12:39 +0300
Subject: [PATCH 3/3] remove redundant handlePasteNoFormat calls in paste
handler
---
.../TipTapEditor/TipTapEditor/composables/useToolbarActions.js | 2 --
1 file changed, 2 deletions(-)
diff --git a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js
index 573381fa8d..58f10bbd94 100644
--- a/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js
+++ b/contentcuration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js
@@ -159,9 +159,7 @@ export function useToolbarActions(emit) {
return;
}
}
- return handlePasteNoFormat();
}
- return handlePasteNoFormat();
} catch (err) {
editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
}