Desktop: Fixes #10274: old.reddit pages are not saved correctly as HTML by the Web Clipper

pull/10383/head
Laurent Cozic 2024-04-27 10:19:35 +01:00
parent a5f118bc26
commit b1a669de01
3 changed files with 21 additions and 1 deletions

View File

@ -0,0 +1 @@
<div><span class="jop-noMdConv">This is a comment we would like to keep</div></form>

View File

@ -0,0 +1 @@
<form><span>This is a comment we would like to keep</span></form>

View File

@ -223,7 +223,7 @@ class HtmlUtils {
// to disable them. SVG graphics are still supported via the IMG tag.
const disallowedTags = [
'script', 'iframe', 'frameset', 'frame', 'object', 'base',
'embed', 'link', 'meta', 'noscript', 'button', 'form',
'embed', 'link', 'meta', 'noscript', 'button',
'input', 'select', 'textarea', 'option', 'optgroup',
'svg',
@ -233,6 +233,14 @@ class HtmlUtils {
'map', 'area',
];
// Certain tags should not be rendered, however unlike for the disallowed tags, we want to
// keep their content. For example the FORM tag may sometimes wrap relevant content so we
// want to keep that content, but we don't want to keep the FORM tag itself. In that case we
// simply replace it with a DIV tag.
const replaceWithDivTags = [
'form',
];
const parser = new htmlparser2.Parser({
onopentag: (name: string, attrs: Record<string, string>) => {
@ -249,6 +257,11 @@ class HtmlUtils {
if (disallowedTagDepth) return;
if (replaceWithDivTags.includes(currentTag())) {
output.push('<div>');
return;
}
attrs = { ...attrs };
// Remove all the attributes that start with "on", which
@ -342,6 +355,11 @@ class HtmlUtils {
if (disallowedTagDepth) return;
if (replaceWithDivTags.includes(currentTag())) {
output.push('</div>');
return;
}
if (isSelfClosingTag(name)) return;
output.push(`</${name}>`);
},