Desktop: Fixes #11597: OneNote Importer should only use text on fallback title (#11598)

pull/11602/head^2
pedr 2025-01-09 12:22:12 -03:00 committed by GitHub
parent 72575e3c6f
commit a81af0711c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 179 additions and 13 deletions

View File

@ -176,4 +176,15 @@ describe('InteropService_Importer_OneNote', () => {
BaseModel.setIdGenerator(originalIdGenerator);
});
skipIfNotCI('should remove hyperlink from title', async () => {
let idx = 0;
const originalIdGenerator = BaseModel.setIdGenerator(() => String(idx++));
const notes = await importNote(`${supportDir}/onenote/remove_hyperlink_on_title.zip`);
for (const note of notes) {
expect(note.body).toMatchSnapshot(note.title);
}
BaseModel.setIdGenerator(originalIdGenerator);
});
});

View File

@ -766,3 +766,122 @@ exports[`InteropService_Importer_OneNote should import a simple OneNote notebook
</body>
</html>"
`;
exports[`InteropService_Importer_OneNote should remove hyperlink from title: wikipedia link 1`] = `
"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>wikipedia link</title>
<style>
* { margin: 0; padding: 0; font-weight: normal; }
table, tr, td { border-color: #A3A3A3; }
ul, ol { padding: 0; }
.title .outline-element { display: inline; }
.title .outline-element:nth-child(2) { margin-left: 10px !important; }
.container-outline { font-family: Calibri, sans-serif; font-size: 6pt; }
.ink-text, .ink-space { display: inline-block; position: relative; vertical-align: bottom; }
.ink-text { top: 0; left: 0; }
.note-tag-icon { position: relative; }
.note-tag-icon > svg { position: absolute; }
.icon-secondary > svg { position: absolute; fill: black; filter: drop-shadow(0 0 2px white); height: 12px; top: -1px; }
.icon-secondary > .content { position: absolute; color: black; filter: drop-shadow(0 0 2px white); font-size: 10px; color: black; top: -1px; user-select: none; }
</style>
</head>
<body>
<div class="title" style="left: 48px; position: absolute; top: 24px;"><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="font-family: Calibri Light; font-size: 20pt;">&nbsp;</span></div>
</div><div class="container-outline"><div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">Sunday, January 05, 2025</span></div>
<div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">10:15 PM</span></div>
</div></div><div class="container-outline" style="left: 48px; position: absolute; top: 115px; width: 624px;"><div class="outline-element" style="margin-left: 0px;"><p style="font-family: Calibri; font-size: 11pt;"><a href="https://zh.wikipedia.org/zh-hans/%E9%A3%8E%E6%99%AF" style="">wikipedia link</a></p></div>
</div>
<script>
if (window.parent !== null) {
window.parent.postMessage(window.location.href, '*');
}
</script>
</body>
</html>"
`;
exports[`InteropService_Importer_OneNote should remove hyperlink from title: 风景 (Web view) 1`] = `
"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>风景 (Web view)</title>
<style>
* { margin: 0; padding: 0; font-weight: normal; }
table, tr, td { border-color: #A3A3A3; }
ul, ol { padding: 0; }
.title .outline-element { display: inline; }
.title .outline-element:nth-child(2) { margin-left: 10px !important; }
.container-outline { font-family: Calibri, sans-serif; font-size: 6pt; }
.ink-text, .ink-space { display: inline-block; position: relative; vertical-align: bottom; }
.ink-text { top: 0; left: 0; }
.note-tag-icon { position: relative; }
.note-tag-icon > svg { position: absolute; }
.icon-secondary > svg { position: absolute; fill: black; filter: drop-shadow(0 0 2px white); height: 12px; top: -1px; }
.icon-secondary > .content { position: absolute; color: black; filter: drop-shadow(0 0 2px white); font-size: 10px; color: black; top: -1px; user-select: none; }
</style>
</head>
<body>
<div class="title" style="left: 48px; position: absolute; top: 24px;"><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="font-family: Calibri Light; font-size: 20pt;">&nbsp;</span></div>
</div><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(128,128,128); font-family: Calibri; font-size: 10pt;">Sunday, January 5, 2025</span></div>
<div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(128,128,128); font-family: Calibri; font-size: 10pt;">10:13 PM</span></div>
</div></div><div class="container-outline" style="left: 48px; position: absolute; top: 115px; width: 624px;"><div class="outline-element" style="margin-left: 0px;"><p style="font-family: Calibri; font-size: 11pt;"><a href="onenote:#风景&section-id={75256889-9e75-4ec2-82ed-fc799557e1b9}&page-id={d099b6f3-7f5a-4c08-aed7-e8d42c59523f}&end" style="">风景</a><span style="font-family: Calibri; font-size: 11pt;"> (</span><a href="https://onedrive.live.com/edit.aspx?resid=193EE54E3252492D!s9b62db4219f740709f444bc0129de4e9&migratedtospo=true&wd=target%28Quick%20Notes.one%7C75256889-9e75-4ec2-82ed-fc799557e1b9%2F%E9%A3%8E%E6%99%AF%7Cd099b6f3-7f5a-4c08-aed7-e8d42c59523f%2F%29&wdorigin=703&wdpreservelink=1" style="">Web view</a><span style="font-family: Calibri; font-size: 11pt;">)</span></p></div>
</div>
<script>
if (window.parent !== null) {
window.parent.postMessage(window.location.href, '*');
}
</script>
</body>
</html>"
`;
exports[`InteropService_Importer_OneNote should remove hyperlink from title: 风景 1`] = `
"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>风景</title>
<style>
* { margin: 0; padding: 0; font-weight: normal; }
table, tr, td { border-color: #A3A3A3; }
ul, ol { padding: 0; }
.title .outline-element { display: inline; }
.title .outline-element:nth-child(2) { margin-left: 10px !important; }
.container-outline { font-family: Calibri, sans-serif; font-size: 6pt; }
.ink-text, .ink-space { display: inline-block; position: relative; vertical-align: bottom; }
.ink-text { top: 0; left: 0; }
.note-tag-icon { position: relative; }
.note-tag-icon > svg { position: absolute; }
.icon-secondary > svg { position: absolute; fill: black; filter: drop-shadow(0 0 2px white); height: 12px; top: -1px; }
.icon-secondary > .content { position: absolute; color: black; filter: drop-shadow(0 0 2px white); font-size: 10px; color: black; top: -1px; user-select: none; }
</style>
</head>
<body>
<div class="title" style="left: 48px; position: absolute; top: 24px;"><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="font-family: Calibri Light; font-size: 20pt;">风景</span></div>
</div><div class="container-outline"><div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">Sunday, January 05, 2025</span></div>
<div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">10:14 PM</span></div>
</div></div>
<script>
if (window.parent !== null) {
window.parent.postMessage(window.location.href, '*');
}
</script>
</body>
</html>"
`;

View File

@ -84,7 +84,7 @@ impl Renderer {
let section_path = renderer.render(section, notebook_dir)?;
log!("section_path: {:?}", section_path);
let path_from_base_dir = unsafe { remove_prefix(section_path.as_str(), base_dir.as_str()) }
let path_from_base_dir = unsafe { remove_prefix(section_path, base_dir.as_str()) }
.unwrap()
.as_string()
.unwrap();

View File

@ -35,7 +35,7 @@ impl<'a> Renderer<'a> {
}
pub(crate) fn render_page(&mut self, page: &Page) -> Result<String> {
let title_text = page.title_text().unwrap_or("Untitled Page");
let title_text = page.title_text().unwrap_or("Untitled Page".to_string());
let mut content = String::new();
@ -70,7 +70,7 @@ impl<'a> Renderer<'a> {
content.push_str(&page_content);
crate::templates::page::render(title_text, &content, &self.global_styles)
crate::templates::page::render(&title_text, &content, &self.global_styles)
}
pub(crate) fn gen_class(&mut self, prefix: &str) -> String {

View File

@ -74,7 +74,7 @@ impl<'a> Renderer<'a> {
// all the styles to be shifted by minus one.
// A better solution would be to look if there isn't anything wrong with the parser,
// but I haven't found what could be causing this yet.
if text.starts_with("\u{000B}") && !indices.is_empty(){
if text.starts_with("\u{000B}") && !indices.is_empty() {
indices.remove(0);
styles.pop();
}

View File

@ -32,10 +32,13 @@ pub(crate) fn parse(object: &Object) -> Result<Data> {
let entity_guid = simple::parse_guid(PropertyType::NotebookManagementEntityGuid, object)?
.ok_or_else(|| ErrorKind::MalformedOneNoteFileData("page metadata has no guid".into()))?;
let cached_title =
simple::parse_string(PropertyType::CachedTitleString, object)?.ok_or_else(|| {
ErrorKind::MalformedOneNoteFileData("page metadata has no cached title".into())
})?;
// The page might not have a title but we can use the first Section outline from the body as the fallback later
let cached_title = simple::parse_string(PropertyType::CachedTitleString, object)?
.ok_or_else(|| {
let guid = simple::parse_guid(PropertyType::NotebookManagementEntityGuid, object);
return guid.map(|g| g.unwrap().to_string());
})
.unwrap_or("Untitled Page".to_string());
let schema_revision_in_order_to_read =
simple::parse_u32(PropertyType::SchemaRevisionInOrderToRead, object)?;
let schema_revision_in_order_to_write =

View File

@ -62,16 +62,23 @@ impl Page {
/// The page's title text.
///
/// This is calculated using a heuristic similar to the one OneNote uses.
pub fn title_text(&self) -> Option<&str> {
pub fn title_text(&self) -> Option<String> {
self.title
.as_ref()
.and_then(|title| title.contents.first())
.and_then(Self::outline_text)
.and_then(|t| Some(Self::remove_hyperlink(t.to_owned())))
.or_else(|| {
self.contents
.iter()
.filter_map(|page_content| page_content.outline())
.filter_map(Self::outline_text)
.filter_map(|t| {
let v = Self::outline_text(t);
if v.is_none() {
return None;
}
return Some(Self::remove_hyperlink(v.unwrap().to_owned()));
})
.next()
})
}
@ -85,6 +92,33 @@ impl Page {
.and_then(|content| content.rich_text())
.and_then(|text| Some(&*text.text).filter(|s| !s.is_empty()))
}
fn remove_hyperlink(title: String) -> String {
const HYPERLINK_MARKER: &str = "\u{fddf}HYPERLINK \"";
let mut title_copy = title.clone();
loop {
// Find the first hyperlink mark
if let Some(marker_start) = title_copy.find(HYPERLINK_MARKER) {
let hyperlink_part = &title_copy[marker_start + HYPERLINK_MARKER.len()..];
// Find the closing double quote of the hyperlink
if let Some(quote_end) = hyperlink_part.find('"') {
let before_hyperlink = &title_copy[..marker_start];
let after_hyperlink = &hyperlink_part[quote_end + 1..];
title_copy = format!("{}{}", before_hyperlink, after_hyperlink);
} else {
// Sometimes links are broken, in these cases we only consider what is before the mark
title_copy = title[..marker_start].to_string();
}
} else {
break;
}
}
title_copy
}
}
/// A page title.

View File

@ -64,7 +64,7 @@ impl Renderer {
let _ = unsafe { write_file(&page_path, page_html.as_bytes()) };
let page_path_without_basedir =
unsafe { remove_prefix(page_path.as_str(), output_dir.as_str()) }
unsafe { remove_prefix(page_path, output_dir.as_str()) }
.unwrap()
.as_string()
.unwrap();
@ -72,7 +72,6 @@ impl Renderer {
}
}
log!("Section finished rendering: {:?}", section.display_name());
let toc_html = templates::section::render(section.display_name(), toc)?;
let toc_file = unsafe {
join_path(

View File

@ -82,7 +82,7 @@ extern "C" {
#[wasm_bindgen(js_name = removePrefix, catch)]
pub unsafe fn remove_prefix(
base_path: &str,
base_path: String,
prefix: &str,
) -> std::result::Result<JsValue, JsValue>;