2017-11-03 00:09:34 +00:00
const stringPadding = require ( 'string-padding' ) ;
2017-07-17 18:59:40 +00:00
const BLOCK _OPEN = "[[BLOCK_OPEN]]" ;
const BLOCK _CLOSE = "[[BLOCK_CLOSE]]" ;
const NEWLINE = "[[NEWLINE]]" ;
const NEWLINE _MERGED = "[[MERGED]]" ;
const SPACE = "[[SPACE]]" ;
2017-06-24 23:19:11 +00:00
function processMdArrayNewLines ( md ) {
while ( md . length && md [ 0 ] == BLOCK _OPEN ) {
md . shift ( ) ;
}
while ( md . length && md [ md . length - 1 ] == BLOCK _CLOSE ) {
md . pop ( ) ;
}
let temp = [ ] ;
let last = '' ;
for ( let i = 0 ; i < md . length ; i ++ ) { let v = md [ i ] ;
if ( isNewLineBlock ( last ) && isNewLineBlock ( v ) && last == v ) {
// Skip it
} else {
temp . push ( v ) ;
}
last = v ;
}
md = temp ;
temp = [ ] ;
last = "" ;
for ( let i = 0 ; i < md . length ; i ++ ) { let v = md [ i ] ;
if ( last == BLOCK _CLOSE && v == BLOCK _OPEN ) {
temp . pop ( ) ;
temp . push ( NEWLINE _MERGED ) ;
} else {
temp . push ( v ) ;
}
last = v ;
}
md = temp ;
temp = [ ] ;
last = "" ;
for ( let i = 0 ; i < md . length ; i ++ ) { let v = md [ i ] ;
if ( last == NEWLINE && ( v == NEWLINE _MERGED || v == BLOCK _CLOSE ) ) {
// Skip it
} else {
temp . push ( v ) ;
}
last = v ;
}
md = temp ;
// NEW!!!
temp = [ ] ;
last = "" ;
for ( let i = 0 ; i < md . length ; i ++ ) { let v = md [ i ] ;
if ( last == NEWLINE && ( v == NEWLINE _MERGED || v == BLOCK _OPEN ) ) {
// Skip it
} else {
temp . push ( v ) ;
}
last = v ;
}
md = temp ;
if ( md . length > 2 ) {
if ( md [ md . length - 2 ] == NEWLINE _MERGED && md [ md . length - 1 ] == NEWLINE ) {
md . pop ( ) ;
}
}
let output = '' ;
let previous = '' ;
let start = true ;
for ( let i = 0 ; i < md . length ; i ++ ) { let v = md [ i ] ;
let add = '' ;
if ( v == BLOCK _CLOSE || v == BLOCK _OPEN || v == NEWLINE || v == NEWLINE _MERGED ) {
add = "\n" ;
} else if ( v == SPACE ) {
if ( previous == SPACE || previous == "\n" || start ) {
continue ; // skip
} else {
add = " " ;
}
} else {
add = v ;
}
start = false ;
output += add ;
previous = add ;
}
if ( ! output . trim ( ) . length ) return '' ;
return output ;
}
function isWhiteSpace ( c ) {
return c == '\n' || c == '\r' || c == '\v' || c == '\f' || c == '\t' || c == ' ' ;
}
// Like QString::simpified(), except that it preserves non-breaking spaces (which
// Evernote uses for identation, etc.)
function simplifyString ( s ) {
let output = '' ;
let previousWhite = false ;
for ( let i = 0 ; i < s . length ; i ++ ) {
let c = s [ i ] ;
let isWhite = isWhiteSpace ( c ) ;
if ( previousWhite && isWhite ) {
// skip
} else {
output += c ;
}
previousWhite = isWhite ;
}
while ( output . length && isWhiteSpace ( output [ 0 ] ) ) output = output . substr ( 1 ) ;
while ( output . length && isWhiteSpace ( output [ output . length - 1 ] ) ) output = output . substr ( 0 , output . length - 1 ) ;
return output ;
}
function collapseWhiteSpaceAndAppend ( lines , state , text ) {
if ( state . inCode ) {
text = "\t" + text ;
lines . push ( text ) ;
} else {
// Remove all \n and \r from the left and right of the text
while ( text . length && ( text [ 0 ] == "\n" || text [ 0 ] == "\r" ) ) text = text . substr ( 1 ) ;
while ( text . length && ( text [ text . length - 1 ] == "\n" || text [ text . length - 1 ] == "\r" ) ) text = text . substr ( 0 , text . length - 1 ) ;
// Collapse all white spaces to just one. If there are spaces to the left and right of the string
// also collapse them to just one space.
let spaceLeft = text . length && text [ 0 ] == ' ' ;
let spaceRight = text . length && text [ text . length - 1 ] == ' ' ;
text = simplifyString ( text ) ;
if ( ! spaceLeft && ! spaceRight && text == "" ) return lines ;
2017-12-02 12:49:42 +00:00
if ( state . inQuote ) {
// Add a ">" at the beginning of the block then at the beginning of each lines. So it turns this:
// "my quote\nsecond line" into this => "> my quote\n> second line"
lines . push ( '> ' ) ;
if ( lines . indexOf ( '\r' ) >= 0 ) {
text = text . replace ( /\n\r/g , '\n\r> ' ) ;
} else {
text = text . replace ( /\n/g , '\n> ' ) ;
}
}
2017-06-24 23:19:11 +00:00
if ( spaceLeft ) lines . push ( SPACE ) ;
lines . push ( text ) ;
if ( spaceRight ) lines . push ( SPACE ) ;
}
return lines ;
}
const imageMimeTypes = [ "image/cgm" , "image/fits" , "image/g3fax" , "image/gif" , "image/ief" , "image/jp2" , "image/jpeg" , "image/jpm" , "image/jpx" , "image/naplps" , "image/png" , "image/prs.btif" , "image/prs.pti" , "image/t38" , "image/tiff" , "image/tiff-fx" , "image/vnd.adobe.photoshop" , "image/vnd.cns.inf2" , "image/vnd.djvu" , "image/vnd.dwg" , "image/vnd.dxf" , "image/vnd.fastbidsheet" , "image/vnd.fpx" , "image/vnd.fst" , "image/vnd.fujixerox.edmics-mmr" , "image/vnd.fujixerox.edmics-rlc" , "image/vnd.globalgraphics.pgb" , "image/vnd.microsoft.icon" , "image/vnd.mix" , "image/vnd.ms-modi" , "image/vnd.net-fpx" , "image/vnd.sealed.png" , "image/vnd.sealedmedia.softseal.gif" , "image/vnd.sealedmedia.softseal.jpg" , "image/vnd.svf" , "image/vnd.wap.wbmp" , "image/vnd.xiff" ] ;
function isImageMimeType ( m ) {
return imageMimeTypes . indexOf ( m ) >= 0 ;
}
function addResourceTag ( lines , resource , alt = "" ) {
2017-08-01 21:40:14 +00:00
// TODO: refactor to use Resource.markdownTag
2017-06-24 23:19:11 +00:00
let tagAlt = alt == "" ? resource . alt : alt ;
if ( ! tagAlt ) tagAlt = '' ;
if ( isImageMimeType ( resource . mime ) ) {
lines . push ( "![" ) ;
lines . push ( tagAlt ) ;
lines . push ( "](:/" + resource . id + ")" ) ;
} else {
lines . push ( "[" ) ;
lines . push ( tagAlt ) ;
lines . push ( "](:/" + resource . id + ")" ) ;
}
return lines ;
}
function isBlockTag ( n ) {
2017-12-06 19:29:58 +00:00
return n == "div" || n == "p" || n == "dl" || n == "dd" || n == 'dt' || n == "center" ;
2017-06-24 23:19:11 +00:00
}
function isStrongTag ( n ) {
2017-12-06 19:29:58 +00:00
return n == "strong" || n == "b" || n == 'big' ;
}
function isStrikeTag ( n ) {
return n == "strike" || n == "s" || n == 'del' ;
2017-06-24 23:19:11 +00:00
}
function isEmTag ( n ) {
return n == "em" || n == "i" || n == "u" ;
}
function isAnchor ( n ) {
return n == "a" ;
}
function isIgnoredEndTag ( n ) {
2017-12-06 19:29:58 +00:00
return n == "en-note" || n == "en-todo" || n == "span" || n == "body" || n == "html" || n == "font" || n == "br" || n == 'hr' || n == 's' || n == 'tbody' || n == 'sup' || n == 'img' || n == 'abbr' || n == 'cite' || n == 'thead' || n == 'small' || n == 'tt' || n == 'sub' ;
2017-06-24 23:19:11 +00:00
}
function isListTag ( n ) {
return n == "ol" || n == "ul" ;
}
// Elements that don't require any special treatment beside adding a newline character
function isNewLineOnlyEndTag ( n ) {
2017-12-06 19:29:58 +00:00
return n == "div" || n == "p" || n == "li" || n == "h1" || n == "h2" || n == "h3" || n == "h4" || n == "h5" || n == 'h6' || n == "dl" || n == "dd" || n == 'dt' || n == "center" ;
2017-06-24 23:19:11 +00:00
}
function isCodeTag ( n ) {
2017-12-02 12:49:42 +00:00
// NOTE: This handles "code" tags that were copied and pasted from a browser to Evernote. Evernote also has its own code block, which
// of course is way more complicated and currently not fully supported (the code will be imported and indented properly, but it won't
// have the extra Markdown indentation that identifies the block as code). For reference this is an example of Evernote-style code block:
//
// <div style="-en-codeblock: true; box-sizing: border-box; padding: 8px; font-family: Monaco, Menlo, Consolas, "Courier New",
// monospace; font-size: 12px; color: rgb(51, 51, 51); border-top-left-radius: 4px; border-top-right-radius: 4px; border-bottom-right-radius:
// 4px; border-bottom-left-radius: 4px; background-color: rgb(251, 250, 248); border: 1px solid rgba(0, 0, 0, 0.14902); background-position:
// initial initial; background-repeat: initial initial;"><div>function justTesting() {</div><div> someCodeBlock();</div>
// <div> return true;</div><div>}</div></div>
//
// Which in normal HTML would be:
//
// <code>
// function justTesting() {
// someCodeBlock();
// return true;
// }
// <code>
2017-06-24 23:19:11 +00:00
return n == "pre" || n == "code" ;
}
function isNewLineBlock ( s ) {
return s == BLOCK _OPEN || s == BLOCK _CLOSE ;
}
function xmlNodeText ( xmlNode ) {
if ( ! xmlNode || ! xmlNode . length ) return '' ;
return xmlNode [ 0 ] ;
}
2017-12-06 19:29:58 +00:00
function attributeToLowerCase ( node ) {
if ( ! node . attributes ) return { } ;
let output = { } ;
for ( let n in node . attributes ) {
if ( ! node . attributes . hasOwnProperty ( n ) ) continue ;
output [ n . toLowerCase ( ) ] = node . attributes [ n ] ;
}
return output ;
}
2017-06-24 23:19:11 +00:00
function enexXmlToMdArray ( stream , resources ) {
2017-12-06 19:29:58 +00:00
let remainingResources = resources . slice ( ) ;
const removeRemainingResource = ( id ) => {
for ( let i = 0 ; i < remainingResources . length ; i ++ ) {
const r = remainingResources [ i ] ;
if ( r . id === id ) {
remainingResources . splice ( i , 1 ) ;
}
}
}
2017-06-24 23:19:11 +00:00
return new Promise ( ( resolve , reject ) => {
let state = {
inCode : false ,
2017-12-02 12:49:42 +00:00
inQuote : false ,
2017-06-24 23:19:11 +00:00
lists : [ ] ,
anchorAttributes : [ ] ,
} ;
let options = { } ;
2017-12-06 19:29:58 +00:00
let strict = false ;
2017-06-24 23:19:11 +00:00
var saxStream = require ( 'sax' ) . createStream ( strict , options )
2017-07-13 19:29:10 +00:00
let section = {
type : 'text' ,
lines : [ ] ,
parent : null ,
} ;
2017-06-24 23:19:11 +00:00
saxStream . on ( 'error' , function ( e ) {
2017-12-06 19:29:58 +00:00
console . warn ( e ) ;
//reject(e);
2017-06-24 23:19:11 +00:00
} )
saxStream . on ( 'text' , function ( text ) {
2017-12-06 19:29:58 +00:00
if ( [ 'table' , 'tr' , 'tbody' ] . indexOf ( section . type ) >= 0 ) return ;
2017-07-13 19:29:10 +00:00
section . lines = collapseWhiteSpaceAndAppend ( section . lines , state , text ) ;
2017-06-24 23:19:11 +00:00
} )
saxStream . on ( 'opentag' , function ( node ) {
2017-12-06 19:29:58 +00:00
const nodeAttributes = attributeToLowerCase ( node ) ;
2017-06-24 23:19:11 +00:00
let n = node . name . toLowerCase ( ) ;
if ( n == 'en-note' ) {
// Start of note
} else if ( isBlockTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ;
} else if ( n == 'table' ) {
let newSection = {
type : 'table' ,
lines : [ ] ,
parent : section ,
2017-12-06 19:29:58 +00:00
toString : function ( ) {
let output = [ ] ;
output . push ( BLOCK _OPEN ) ;
for ( let i = 0 ; i < this . lines . length ; i ++ ) {
output = output . concat ( this . lines [ i ] . toMdLines ( ) ) ;
}
output . push ( BLOCK _CLOSE ) ;
return processMdArrayNewLines ( output ) ;
} ,
2017-07-13 19:29:10 +00:00
} ;
section . lines . push ( newSection ) ;
section = newSection ;
2017-12-06 19:29:58 +00:00
} else if ( n == 'tbody' || n == 'thead' ) {
2017-07-13 19:29:10 +00:00
// Ignore it
} else if ( n == 'tr' ) {
2017-12-06 19:29:58 +00:00
if ( section . type != 'table' ) {
console . warn ( 'Found a <tr> tag outside of a table' ) ;
return ;
}
2017-07-13 19:29:10 +00:00
let newSection = {
type : 'tr' ,
lines : [ ] ,
parent : section ,
2017-07-17 18:59:40 +00:00
isHeader : false ,
2017-12-06 19:29:58 +00:00
// Normally tables are rendered properly as markdown, but for table within table within table... we cannot
// handle this in Markdown so simply render it as one cell per line.
toMdLines : function ( ) {
let output = [ ] ;
output . push ( BLOCK _OPEN ) ;
for ( let i = 0 ; i < this . lines . length ; i ++ ) {
output . push ( this . lines [ i ] . toString ( ) ) ;
}
output . push ( BLOCK _CLOSE ) ;
return output ;
} ,
2017-07-13 19:29:10 +00:00
}
section . lines . push ( newSection ) ;
section = newSection ;
} else if ( n == 'td' || n == 'th' ) {
2017-12-06 19:29:58 +00:00
if ( section . type != 'tr' ) {
console . warn ( 'Found a <td> tag outside of a <tr>' ) ;
return ;
}
2017-07-13 19:29:10 +00:00
2017-07-17 18:59:40 +00:00
if ( n == 'th' ) section . isHeader = true ;
2017-07-13 19:29:10 +00:00
let newSection = {
type : 'td' ,
lines : [ ] ,
parent : section ,
2017-12-06 19:29:58 +00:00
toString : function ( ) {
return processMdArrayNewLines ( this . lines ) ;
} ,
2017-07-13 19:29:10 +00:00
} ;
section . lines . push ( newSection ) ;
section = newSection ;
2017-06-24 23:19:11 +00:00
} else if ( isListTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ;
2017-06-24 23:19:11 +00:00
state . lists . push ( { tag : n , counter : 1 } ) ;
} else if ( n == 'li' ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ;
2017-06-24 23:19:11 +00:00
if ( ! state . lists . length ) {
reject ( "Found <li> tag without being inside a list" ) ; // TODO: could be a warning, but nothing to handle warnings at the moment
return ;
}
let container = state . lists [ state . lists . length - 1 ] ;
if ( container . tag == "ul" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( "- " ) ;
2017-06-24 23:19:11 +00:00
} else {
2017-07-13 19:29:10 +00:00
section . lines . push ( container . counter + '. ' ) ;
2017-06-24 23:19:11 +00:00
container . counter ++ ;
}
} else if ( isStrongTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( "**" ) ;
2017-12-06 19:29:58 +00:00
} else if ( isStrikeTag ( n ) ) {
section . lines . push ( '(' ) ;
} else if ( n == 'samp' ) {
section . lines . push ( '`' ) ;
2017-12-02 12:49:42 +00:00
} else if ( n == 'q' ) {
section . lines . push ( '"' ) ;
2017-12-06 19:29:58 +00:00
} else if ( n == 'img' ) {
// TODO: TEST IMAGE
if ( nodeAttributes . src ) { // Many (most?) img tags don't have no source associated, especially when they were imported from HTML
let s = '![' ;
if ( nodeAttributes . alt ) s += nodeAttributes . alt ;
s += '](' + nodeAttributes . src + ')' ;
section . lines . push ( s ) ;
}
2017-06-24 23:19:11 +00:00
} else if ( isAnchor ( n ) ) {
2017-12-06 19:29:58 +00:00
state . anchorAttributes . push ( nodeAttributes ) ;
2017-07-13 19:29:10 +00:00
section . lines . push ( '[' ) ;
2017-06-24 23:19:11 +00:00
} else if ( isEmTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( "*" ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "en-todo" ) {
2017-12-06 19:29:58 +00:00
let x = nodeAttributes && nodeAttributes . checked && nodeAttributes . checked . toLowerCase ( ) == 'true' ? 'X' : ' ' ;
2017-07-13 19:29:10 +00:00
section . lines . push ( '- [' + x + '] ' ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "hr" ) {
2017-07-15 16:14:15 +00:00
// Needs to be surrounded by new lines so that it's properly rendered as a line when converting to HTML
section . lines . push ( NEWLINE ) ;
section . lines . push ( '----------------------------------------' ) ;
section . lines . push ( NEWLINE ) ;
section . lines . push ( NEWLINE ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "h1" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ; section . lines . push ( "# " ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "h2" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ; section . lines . push ( "## " ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "h3" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ; section . lines . push ( "### " ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "h4" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ; section . lines . push ( "#### " ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "h5" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ; section . lines . push ( "##### " ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "h6" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ; section . lines . push ( "###### " ) ;
2017-12-02 12:49:42 +00:00
} else if ( n == 'blockquote' ) {
section . lines . push ( BLOCK _OPEN ) ;
state . inQuote = true ;
2017-12-06 19:29:58 +00:00
} else if ( isCodeTag ( n , nodeAttributes ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _OPEN ) ;
2017-06-24 23:19:11 +00:00
state . inCode = true ;
} else if ( n == "br" ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( NEWLINE ) ;
2017-06-24 23:19:11 +00:00
} else if ( n == "en-media" ) {
2017-12-06 19:29:58 +00:00
const hash = nodeAttributes . hash ;
2017-06-24 23:19:11 +00:00
let resource = null ;
for ( let i = 0 ; i < resources . length ; i ++ ) {
let r = resources [ i ] ;
if ( r . id == hash ) {
resource = r ;
2017-12-06 19:29:58 +00:00
removeRemainingResource ( r . id ) ;
2017-06-24 23:19:11 +00:00
break ;
}
}
if ( ! resource ) {
// This is a bit of a hack. Notes sometime have resources attached to it, but those <resource> tags don't contain
// an "objID" tag, making it impossible to reference the resource. However, in this case the content of the note
// will contain a corresponding <en-media/> tag, which has the ID in the "hash" attribute. All this information
// has been collected above so we now set the resource ID to the hash attribute of the en-media tags. Here's an
// example of note that shows this problem:
// <?xml version="1.0" encoding="UTF-8"?>
// <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
// <en-export export-date="20161221T203133Z" application="Evernote/Windows" version="6.x">
// <note>
// <title>Commande</title>
// <content>
// <![CDATA[
// <?xml version="1.0" encoding="UTF-8"?>
// <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
// <en-note>
// <en-media alt="your QR code" hash="216a16a1bbe007fba4ccf60b118b4ccc" type="image/png"></en-media>
// </en-note>
// ]]>
// </content>
// <created>20160921T203424Z</created>
// <updated>20160921T203438Z</updated>
// <note-attributes>
// <reminder-order>20160902T140445Z</reminder-order>
// <reminder-done-time>20160924T101120Z</reminder-done-time>
// </note-attributes>
// <resource>
// <data encoding="base64">........</data>
// <mime>image/png</mime>
// <width>150</width>
// <height>150</height>
// </resource>
// </note>
// </en-export>
let found = false ;
2017-12-06 19:29:58 +00:00
for ( let i = 0 ; i < remainingResources . length ; i ++ ) {
let r = remainingResources [ i ] ;
2017-06-24 23:19:11 +00:00
if ( ! r . id ) {
r . id = hash ;
2017-12-06 19:29:58 +00:00
remainingResources [ i ] = r ;
2017-06-24 23:19:11 +00:00
found = true ;
break ;
}
}
if ( ! found ) {
console . warn ( 'Hash with no associated resource: ' + hash ) ;
}
} else {
// If the resource does not appear among the note's resources, it
// means it's an attachement. It will be appended along with the
// other remaining resources at the bottom of the markdown text.
if ( ! ! resource . id ) {
2017-12-06 19:29:58 +00:00
section . lines = addResourceTag ( section . lines , resource , nodeAttributes . alt ) ;
2017-06-24 23:19:11 +00:00
}
}
2017-12-06 19:29:58 +00:00
} else if ( n == "span" || n == "font" || n == 'sup' || n == 'cite' || n == 'abbr' || n == 'small' || n == 'tt' || n == 'sub' ) {
// Inline tags that can be ignored in Markdown
2017-06-24 23:19:11 +00:00
} else {
console . warn ( "Unsupported start tag: " + n ) ;
}
} )
saxStream . on ( 'closetag' , function ( n ) {
2017-12-06 19:29:58 +00:00
n = n ? n . toLowerCase ( ) : n ;
2017-06-24 23:19:11 +00:00
if ( n == 'en-note' ) {
// End of note
} else if ( isNewLineOnlyEndTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _CLOSE ) ;
} else if ( n == 'td' || n == 'th' ) {
2017-12-06 19:29:58 +00:00
if ( section && section . parent ) section = section . parent ;
2017-07-13 19:29:10 +00:00
} else if ( n == 'tr' ) {
2017-12-06 19:29:58 +00:00
if ( section && section . parent ) section = section . parent ;
2017-07-13 19:29:10 +00:00
} else if ( n == 'table' ) {
2017-12-06 19:29:58 +00:00
if ( section && section . parent ) section = section . parent ;
2017-06-24 23:19:11 +00:00
} else if ( isIgnoredEndTag ( n ) ) {
// Skip
} else if ( isListTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _CLOSE ) ;
2017-06-24 23:19:11 +00:00
state . lists . pop ( ) ;
} else if ( isStrongTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( "**" ) ;
2017-12-06 19:29:58 +00:00
} else if ( isStrikeTag ( n ) ) {
section . lines . push ( ')' ) ;
} else if ( n == 'samp' ) {
section . lines . push ( '`' ) ;
2017-06-24 23:19:11 +00:00
} else if ( isEmTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( "*" ) ;
2017-12-02 12:49:42 +00:00
} else if ( n == 'q' ) {
section . lines . push ( '"' ) ;
} else if ( n == 'blockquote' ) {
section . lines . push ( BLOCK _OPEN ) ;
state . inQuote = false ;
2017-06-24 23:19:11 +00:00
} else if ( isCodeTag ( n ) ) {
state . inCode = false ;
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _CLOSE ) ;
2017-06-24 23:19:11 +00:00
} else if ( isAnchor ( n ) ) {
let attributes = state . anchorAttributes . pop ( ) ;
let url = attributes && attributes . href ? attributes . href : '' ;
2017-07-13 19:16:01 +00:00
2017-07-13 19:29:10 +00:00
if ( section . lines . length < 1 ) throw new Error ( 'Invalid anchor tag closing' ) ; // Sanity check, but normally not possible
2017-07-13 19:16:01 +00:00
// When closing the anchor tag, check if there's is any text content. If not
// put the URL as is (don't wrap it in [](url)). The markdown parser, using
// GitHub flavour, will turn this URL into a link. This is to generate slightly
// cleaner markdown.
2017-07-13 19:29:10 +00:00
let previous = section . lines [ section . lines . length - 1 ] ;
2017-07-13 19:16:01 +00:00
if ( previous == '[' ) {
2017-07-13 19:29:10 +00:00
section . lines . pop ( ) ;
section . lines . push ( url ) ;
2017-07-13 19:16:01 +00:00
} else if ( ! previous || previous == url ) {
2017-07-13 19:29:10 +00:00
section . lines . pop ( ) ;
section . lines . pop ( ) ;
section . lines . push ( url ) ;
2017-07-13 19:16:01 +00:00
} else {
2017-07-13 19:29:10 +00:00
section . lines . push ( '](' + url + ')' ) ;
2017-07-13 19:16:01 +00:00
}
2017-06-24 23:19:11 +00:00
} else if ( isListTag ( n ) ) {
2017-07-13 19:29:10 +00:00
section . lines . push ( BLOCK _CLOSE ) ;
2017-06-24 23:19:11 +00:00
state . lists . pop ( ) ;
} else if ( n == "en-media" ) {
// Skip
} else if ( isIgnoredEndTag ( n ) ) {
// Skip
} else {
console . warn ( "Unsupported end tag: " + n ) ;
}
} )
saxStream . on ( 'attribute' , function ( attr ) {
} )
saxStream . on ( 'end' , function ( ) {
resolve ( {
2017-07-13 19:29:10 +00:00
content : section ,
2017-12-06 19:29:58 +00:00
resources : remainingResources ,
2017-06-24 23:19:11 +00:00
} ) ;
} )
stream . pipe ( saxStream ) ;
} ) ;
}
2017-07-17 18:59:40 +00:00
function setTableCellContent ( table ) {
if ( ! table . type == 'table' ) throw new Error ( 'Only for tables' ) ;
for ( let trIndex = 0 ; trIndex < table . lines . length ; trIndex ++ ) {
const tr = table . lines [ trIndex ] ;
for ( let tdIndex = 0 ; tdIndex < tr . lines . length ; tdIndex ++ ) {
2017-07-18 22:38:07 +00:00
let td = tr . lines [ tdIndex ] ;
2017-07-17 18:59:40 +00:00
td . content = processMdArrayNewLines ( td . lines ) ;
td . content = td . content . replace ( /\n\n\n\n\n/g , ' ' ) ;
td . content = td . content . replace ( /\n\n\n\n/g , ' ' ) ;
td . content = td . content . replace ( /\n\n\n/g , ' ' ) ;
td . content = td . content . replace ( /\n\n/g , ' ' ) ;
td . content = td . content . replace ( /\n/g , ' ' ) ;
2017-07-13 19:29:10 +00:00
}
}
2017-07-17 18:59:40 +00:00
return table ;
2017-07-13 19:29:10 +00:00
}
2017-07-17 18:59:40 +00:00
function cellWidth ( cellText ) {
const lines = cellText . split ( "\n" ) ;
let maxWidth = 0 ;
for ( let i = 0 ; i < lines . length ; i ++ ) {
const line = lines [ i ] ;
if ( line . length > maxWidth ) maxWidth = line . length ;
}
return maxWidth ;
}
2017-07-13 19:29:10 +00:00
2017-07-17 18:59:40 +00:00
function colWidths ( table ) {
let output = [ ] ;
for ( let trIndex = 0 ; trIndex < table . lines . length ; trIndex ++ ) {
const tr = table . lines [ trIndex ] ;
for ( let tdIndex = 0 ; tdIndex < tr . lines . length ; tdIndex ++ ) {
const td = tr . lines [ tdIndex ] ;
2017-12-06 19:29:58 +00:00
const w = Math . min ( cellWidth ( td . content ) , 20 ) ; // Have to set a max width otherwise it can be extremely long for notes that import entire web pages (eg. Hacker News comment pages)
2017-07-17 18:59:40 +00:00
if ( output . length <= tdIndex ) output . push ( 0 ) ;
if ( w > output [ tdIndex ] ) output [ tdIndex ] = w ;
}
}
return output ;
}
2017-07-13 19:29:10 +00:00
2017-07-17 18:59:40 +00:00
function drawTable ( table , colWidths ) {
// | First Header | Second Header |
// | ------------- | ------------- |
// | Content Cell | Content Cell |
// | Content Cell | Content Cell |
2017-07-20 18:58:06 +00:00
// There must be at least 3 dashes separating each header cell.
// https://gist.github.com/IanWang/28965e13cdafdef4e11dc91f578d160d#tables
const minColWidth = 3 ;
2017-07-17 18:59:40 +00:00
let lines = [ ] ;
let headerDone = false ;
for ( let trIndex = 0 ; trIndex < table . lines . length ; trIndex ++ ) {
const tr = table . lines [ trIndex ] ;
const isHeader = tr . isHeader ;
let line = [ ] ;
let headerLine = [ ] ;
let emptyHeader = null ;
for ( let tdIndex = 0 ; tdIndex < colWidths . length ; tdIndex ++ ) {
2017-07-20 18:58:06 +00:00
const width = Math . max ( minColWidth , colWidths [ tdIndex ] ) ;
2017-07-18 22:38:07 +00:00
const cell = tr . lines [ tdIndex ] ? tr . lines [ tdIndex ] . content : '' ;
2017-07-17 18:59:40 +00:00
line . push ( stringPadding ( cell , width , ' ' , stringPadding . RIGHT ) ) ;
if ( ! headerDone ) {
if ( ! isHeader ) {
if ( ! emptyHeader ) emptyHeader = [ ] ;
let h = stringPadding ( ' ' , width , ' ' , stringPadding . RIGHT ) ;
if ( ! width ) h = '' ;
emptyHeader . push ( h ) ;
2017-07-13 19:29:10 +00:00
}
2017-07-17 18:59:40 +00:00
headerLine . push ( '-' . repeat ( width ) ) ;
2017-07-13 19:29:10 +00:00
}
2017-07-17 18:59:40 +00:00
}
if ( emptyHeader ) {
lines . push ( '| ' + emptyHeader . join ( ' | ' ) + ' |' ) ;
lines . push ( '| ' + headerLine . join ( ' | ' ) + ' |' ) ;
headerDone = true ;
}
lines . push ( '| ' + line . join ( ' | ' ) + ' |' ) ;
if ( ! headerDone ) {
lines . push ( '| ' + headerLine . join ( ' | ' ) + ' |' ) ;
headerDone = true ;
2017-07-13 19:29:10 +00:00
}
}
2017-07-17 18:59:40 +00:00
return lines . join ( '<<<<:D>>>>' + NEWLINE + '<<<<:D>>>>' ) . split ( '<<<<:D>>>>' ) ;
2017-07-13 19:29:10 +00:00
}
2017-06-24 23:19:11 +00:00
async function enexXmlToMd ( stream , resources ) {
let result = await enexXmlToMdArray ( stream , resources ) ;
2017-07-13 19:16:01 +00:00
2017-07-17 18:59:40 +00:00
let mdLines = [ ] ;
for ( let i = 0 ; i < result . content . lines . length ; i ++ ) {
let line = result . content . lines [ i ] ;
if ( typeof line === 'object' ) { // A table
let table = setTableCellContent ( line ) ;
2017-07-18 22:38:07 +00:00
//console.log(require('util').inspect(table, false, null))
2017-07-17 18:59:40 +00:00
const cw = colWidths ( table ) ;
const tableLines = drawTable ( table , cw ) ;
mdLines . push ( BLOCK _OPEN ) ;
mdLines = mdLines . concat ( tableLines ) ;
mdLines . push ( BLOCK _CLOSE ) ;
} else { // an actual line
mdLines . push ( line ) ;
}
}
2017-07-13 19:29:10 +00:00
2017-06-24 23:19:11 +00:00
let firstAttachment = true ;
for ( let i = 0 ; i < result . resources . length ; i ++ ) {
let r = result . resources [ i ] ;
if ( firstAttachment ) mdLines . push ( NEWLINE ) ;
mdLines . push ( NEWLINE ) ;
mdLines = addResourceTag ( mdLines , r , r . filename ) ;
firstAttachment = false ;
}
return processMdArrayNewLines ( mdLines ) ;
}
2017-11-03 00:13:17 +00:00
module . exports = { enexXmlToMd , processMdArrayNewLines , NEWLINE , addResourceTag } ;