Clipper: Fixes #4105: Handle certain types of code block

pull/4187/head
Laurent Cozic 2020-12-02 15:43:44 +00:00
parent 5183767e43
commit 8d90cc234f
56 changed files with 12607 additions and 376 deletions

View File

@ -10,6 +10,8 @@ highlight.pack.js
Modules/TinyMCE/IconPack/postinstall.js
Modules/TinyMCE/JoplinLists/
Modules/TinyMCE/langs/
packages/turndown/
packages/turndown-plugin-gfm/
node_modules/
packages/lib/lib/lib.js
packages/lib/locales/index.js

View File

@ -32,7 +32,7 @@ describe('HtmlToMd', function() {
const htmlPath = `${basePath}/${htmlFilename}`;
const mdPath = `${basePath}/${filename(htmlFilename)}.md`;
// if (htmlFilename !== 'joplin_source_2.html') continue;
// if (htmlFilename !== 'code_3.html') continue;
// if (htmlFilename.indexOf('image_preserve_size') !== 0) continue;
@ -61,16 +61,30 @@ describe('HtmlToMd', function() {
}
if (actualMd !== expectedMd) {
console.info('');
console.info(`Error converting file: ${htmlFilename}`);
console.info('--------------------------------- Got:');
console.info(actualMd);
console.info('--------------------------------- Raw:');
console.info(actualMd.split('\n'));
console.info('--------------------------------- Expected:');
console.info(expectedMd.split('\n'));
console.info('--------------------------------------------');
console.info('');
const result = [];
result.push('');
result.push(`Error converting file: ${htmlFilename}`);
result.push('--------------------------------- Got:');
result.push(actualMd.split('\n').map(l => `"${l}"`).join('\n'));
result.push('--------------------------------- Expected:');
result.push(expectedMd.split('\n').map(l => `"${l}"`).join('\n'));
result.push('--------------------------------------------');
result.push('');
console.info(result.join('\n'));
// console.info('');
// console.info(`Error converting file: ${htmlFilename}`);
// console.info('--------------------------------- Got:');
// console.info(actualMd);
// console.info('--------------------------------- Raw:');
// console.info(actualMd.split('\n'));
// console.info('--------------------------------- Expected:');
// console.info(expectedMd.split('\n'));
// console.info('--------------------------------------------');
// console.info('');
expect(false).toBe(true);
// return;

View File

@ -0,0 +1 @@
<pre style="font-family: monospace;"><span>├── myproj_app<br>│ ├── api.py<br>│ └── Dockerfile</span></pre>

View File

@ -0,0 +1,5 @@
```
├── myproj_app
│ ├── api.py
│ └── Dockerfile
```

View File

@ -1,9 +1,9 @@
const TurndownService = require('joplin-turndown');
const TurndownService = require('@joplin/turndown');
const turndownPluginGfm = require('@joplin/turndown-plugin-gfm').gfm;
const markdownUtils = require('./markdownUtils').default;
class HtmlToMd {
parse(html, options = {}) {
const turndownPluginGfm = require('joplin-turndown-plugin-gfm').gfm;
const turndown = new TurndownService({
headingStyle: 'atx',
anchorNames: options.anchorNames ? options.anchorNames.map(n => n.trim().toLowerCase()) : [],
@ -12,6 +12,7 @@ class HtmlToMd {
bulletListMarker: '-',
emDelimiter: '*',
strongDelimiter: '**',
br: '',
});
turndown.use(turndownPluginGfm);
turndown.remove('script');

File diff suppressed because it is too large Load Diff

View File

@ -17,14 +17,16 @@
},
"devDependencies": {
"@types/jest": "^26.0.15",
"jest": "^26.6.3",
"@types/node": "^14.14.6",
"jest": "^26.6.3",
"typescript": "^4.0.5"
},
"dependencies": {
"@joplin/fork-htmlparser2": "^4.1.8",
"@joplin/fork-sax": "^1.2.12",
"@joplin/renderer": "^1.0.17",
"@joplin/turndown": "^4.0.30",
"@joplin/turndown-plugin-gfm": "^1.0.12",
"async-mutex": "^0.1.3",
"aws-sdk": "^2.588.0",
"base-64": "^0.1.0",
@ -45,8 +47,6 @@
"image-data-uri": "^2.0.0",
"image-type": "^3.0.0",
"immer": "^7.0.14",
"joplin-turndown": "^4.0.30",
"joplin-turndown-plugin-gfm": "^1.0.12",
"levenshtein": "^1.0.5",
"lodash": "^4.17.20",
"markdown-it": "^10.0.0",

View File

@ -0,0 +1,5 @@
dist
lib
node_modules
npm-debug.log
test/*browser.js

View File

@ -0,0 +1,4 @@
language: node_js
node_js:
- "node"
- "6"

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017 Dom Christie
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,53 @@
# turndown-plugin-gfm
A [Turndown](https://github.com/domchristie/turndown) plugin which adds GitHub Flavored Markdown extensions.
This is a fork of the original [turndown-plugin-gfm](https://github.com/domchristie/turndown-plugin-gfm) for use with [Joplin](https://github.com/laurent22/joplin). The changes are:
- New: Always render tables even if they don't have a header.
- New: Don't render the border of tables that contain other tables (frequent for websites that do the layout using tables). Only render the inner tables, if any, and if they also don't contain other tables.
- New: Replace newlines (`\n`) with `<br>` inside table cells so that multi-line content is displayed correctly as Markdown.
- New: Table cells are at least three characters long (padded with spaces) so that they render correctly in GFM-compliant renderers.
- New: Handle colspan in TD tags
- Fixed: Ensure there are no blank lines inside tables (due for example to an empty `<tr>` tag)
- Fixed: Fixed importing tables that contain pipes.
## Installation
npm:
```
npm install joplin-turndown-plugin-gfm
```
## Usage
```js
// For Node.js
var TurndownService = require('@joplin/turndown')
var turndownPluginGfm = require('@joplin/turndown-plugin-gfm')
var gfm = turndownPluginGfm.gfm
var turndownService = new TurndownService()
turndownService.use(gfm)
var markdown = turndownService.turndown('<strike>Hello world!</strike>')
```
turndown-plugin-gfm is a suite of plugins which can be applied individually. The available plugins are as follows:
- `strikethrough` (for converting `<strike>`, `<s>`, and `<del>` elements)
- `tables`
- `taskListItems`
- `gfm` (which applies all of the above)
So for example, if you only wish to convert tables:
```js
var tables = require('turndown-plugin-gfm').tables
var turndownService = new TurndownService()
turndownService.use(tables)
```
## License
turndown-plugin-gfm is copyright © 2017+ Dom Christie and released under the MIT license.

View File

@ -0,0 +1,8 @@
#!/bin/bash
set -e
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
ROOT_DIR="$SCRIPT_DIR/../.."
npm run build
cd $ROOT_DIR/packages/app-cli && npm run test -- HtmlToMd

View File

@ -0,0 +1,8 @@
import config from './rollup.config';
export default config({
output: {
format: 'cjs',
file: 'lib/turndown-plugin-gfm.browser.cjs.js',
},
});

View File

@ -0,0 +1,8 @@
import config from './rollup.config';
export default config({
output: {
format: 'es',
file: 'lib/turndown-plugin-gfm.browser.es.js',
},
});

View File

@ -0,0 +1,8 @@
import config from './rollup.config';
export default config({
output: {
format: 'cjs',
file: 'lib/turndown-plugin-gfm.cjs.js',
},
});

View File

@ -0,0 +1,8 @@
import config from './rollup.config';
export default config({
output: {
format: 'es',
file: 'lib/turndown-plugin-gfm.es.js',
},
});

View File

@ -0,0 +1,8 @@
import config from './rollup.config';
export default config({
output: {
format: 'iife',
file: 'dist/turndown-plugin-gfm.js',
},
});

View File

@ -0,0 +1,7 @@
export default function(config) {
return {
name: 'turndownPluginGfm',
input: 'src/gfm.js',
output: config.output,
};
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,43 @@
{
"name": "@joplin/turndown-plugin-gfm",
"description": "Turndown plugin to add GitHub Flavored Markdown extensions.",
"version": "1.0.12",
"author": "Dom Christie",
"main": "lib/turndown-plugin-gfm.cjs.js",
"module": "lib/turndown-plugin-gfm.es.js",
"jsnext:main": "lib/turndown-plugin-gfm.es.js",
"devDependencies": {
"browserify": "^14.5.0",
"rollup": "^0.50.0",
"standard": "^10.0.3",
"turndown": "4.0.1",
"turndown-attendant": "0.0.2"
},
"files": [
"lib",
"dist"
],
"keywords": [
"turndown",
"turndown-plugin",
"html-to-markdown",
"html",
"markdown",
"github-flavored-markdown",
"gfm"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/laurent22/joplin-turndown-plugin-gfm.git"
},
"scripts": {
"build-all": "npm run build-cjs && npm run build-es && npm run build-iife",
"build": "rollup -c config/rollup.config.cjs.js",
"build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js",
"build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js",
"build-iife": "rollup -c config/rollup.config.iife.js",
"build-test": "browserify test/turndown-plugin-gfm-test.js --outfile test/turndown-plugin-gfm-test.browser.js",
"postinstall": "npm run build"
}
}

View File

@ -0,0 +1,3 @@
#!/bin/bash
npm version patch
npm publish

View File

@ -0,0 +1,15 @@
import highlightedCodeBlock from './highlighted-code-block'
import strikethrough from './strikethrough'
import tables from './tables'
import taskListItems from './task-list-items'
function gfm (turndownService) {
turndownService.use([
highlightedCodeBlock,
strikethrough,
tables,
taskListItems
])
}
export { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems }

View File

@ -0,0 +1,25 @@
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/
export default function highlightedCodeBlock (turndownService) {
turndownService.addRule('highlightedCodeBlock', {
filter: function (node) {
var firstChild = node.firstChild
return (
node.nodeName === 'DIV' &&
highlightRegExp.test(node.className) &&
firstChild &&
firstChild.nodeName === 'PRE'
)
},
replacement: function (content, node, options) {
var className = node.className || ''
var language = (className.match(highlightRegExp) || [null, ''])[1]
return (
'\n\n' + options.fence + language + '\n' +
node.firstChild.textContent +
'\n' + options.fence + '\n\n'
)
}
})
}

View File

@ -0,0 +1,8 @@
export default function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
})
}

View File

@ -0,0 +1,171 @@
var indexOf = Array.prototype.indexOf
var every = Array.prototype.every
var rules = {}
rules.tableCell = {
filter: ['th', 'td'],
replacement: function (content, node) {
if (tableShouldBeSkipped(nodeParentTable(node))) return content;
return cell(content, node)
}
}
rules.tableRow = {
filter: 'tr',
replacement: function (content, node) {
const parentTable = nodeParentTable(node);
if (tableShouldBeSkipped(parentTable)) return content;
var borderCells = ''
var alignMap = { left: ':--', right: '--:', center: ':-:' }
if (isHeadingRow(node)) {
const colCount = tableColCount(parentTable);
for (var i = 0; i < colCount; i++) {
const childNode = colCount >= node.childNodes.length ? null : node.childNodes[i];
var border = '---'
var align = childNode ? (childNode.getAttribute('align') || '').toLowerCase() : '';
if (align) border = alignMap[align] || border
if (childNode) {
borderCells += cell(border, node.childNodes[i])
} else {
borderCells += cell(border, null, i);
}
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
}
}
rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE'
},
replacement: function (content, node) {
if (tableShouldBeSkipped(node)) return content;
// Ensure there are no blank lines
content = content.replace(/\n+/g, '\n')
// If table has no heading, add an empty one so as to get a valid Markdown table
var secondLine = content.trim().split('\n');
if (secondLine.length >= 2) secondLine = secondLine[1]
var secondLineIsDivider = secondLine.indexOf('| ---') === 0
var columnCount = tableColCount(node);
var emptyHeader = ''
if (columnCount && !secondLineIsDivider) {
emptyHeader = '|' + ' |'.repeat(columnCount) + '\n' + '|' + ' --- |'.repeat(columnCount)
}
return '\n\n' + emptyHeader + content + '\n\n'
}
}
rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
return content
}
}
// A tr is a heading row if:
// - the parent is a THEAD
// - or if its the first child of the TABLE or the first TBODY (possibly
// following a blank THEAD)
// - and every cell is a TH
function isHeadingRow (tr) {
var parentNode = tr.parentNode
return (
parentNode.nodeName === 'THEAD' ||
(
parentNode.firstChild === tr &&
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
)
)
}
function isFirstTbody (element) {
var previousSibling = element.previousSibling
return (
element.nodeName === 'TBODY' && (
!previousSibling ||
(
previousSibling.nodeName === 'THEAD' &&
/^\s*$/i.test(previousSibling.textContent)
)
)
)
}
function cell (content, node = null, index = null) {
if (index === null) index = indexOf.call(node.parentNode.childNodes, node)
var prefix = ' '
if (index === 0) prefix = '| '
let filteredContent = content.trim().replace(/\n\r/g, '<br>').replace(/\n/g, "<br>");
filteredContent = filteredContent.replace(/\|+/g, '\\|')
while (filteredContent.length < 3) filteredContent += ' ';
if (node) filteredContent = handleColSpan(filteredContent, node, ' ');
return prefix + filteredContent + ' |'
}
function nodeContainsTable(node) {
if (!node.childNodes) return false;
for (let i = 0; i < node.childNodes.length; i++) {
const child = node.childNodes[i];
if (child.nodeName === 'TABLE') return true;
if (nodeContainsTable(child)) return true;
}
return false;
}
// Various conditions under which a table should be skipped - i.e. each cell
// will be rendered one after the other as if they were paragraphs.
function tableShouldBeSkipped(tableNode) {
if (!tableNode) return true;
if (!tableNode.rows) return true;
if (tableNode.rows.length === 1 && tableNode.rows[0].childNodes.length <= 1) return true; // Table with only one cell
if (nodeContainsTable(tableNode)) return true;
return false;
}
function nodeParentTable(node) {
let parent = node.parentNode;
while (parent.nodeName !== 'TABLE') {
parent = parent.parentNode;
if (!parent) return null;
}
return parent;
}
function handleColSpan(content, node, emptyChar) {
const colspan = node.getAttribute('colspan') || 1;
for (let i = 1; i < colspan; i++) {
content += ' | ' + emptyChar.repeat(3);
}
return content
}
function tableColCount(node) {
let maxColCount = 0;
for (let i = 0; i < node.rows.length; i++) {
const row = node.rows[i]
const colCount = row.childNodes.length
if (colCount > maxColCount) maxColCount = colCount
}
return maxColCount
}
export default function tables (turndownService) {
turndownService.keep(function (node) {
return node.nodeName === 'TABLE'
})
for (var key in rules) turndownService.addRule(key, rules[key])
}

View File

@ -0,0 +1,10 @@
export default function taskListItems (turndownService) {
turndownService.addRule('taskListItems', {
filter: function (node) {
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
},
replacement: function (content, node) {
return (node.checked ? '[x]' : '[ ]') + ' '
}
})
}

View File

@ -0,0 +1,323 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>turndown test runner</title>
<link rel="stylesheet" href="../node_modules/turndown-attendant/dist/styles.css">
</head>
<body>
<!-- TEST CASES -->
<div class="case" data-name="strike">
<div class="input"><strike>Lorem ipsum</strike></div>
<pre class="expected">~Lorem ipsum~</pre>
</div>
<div class="case" data-name="s">
<div class="input"><s>Lorem ipsum</s></div>
<pre class="expected">~Lorem ipsum~</pre>
</div>
<div class="case" data-name="del">
<div class="input"><del>Lorem ipsum</del></div>
<pre class="expected">~Lorem ipsum~</pre>
</div>
<div class="case" data-name="unchecked inputs">
<div class="input"><ul><li><input type=checkbox>Check Me!</li></ul></div>
<pre class="expected">* [ ] Check Me!</pre>
</div>
<div class="case" data-name="checked inputs">
<div class="input"><ul><li><input type=checkbox checked>Checked!</li></ul></div>
<pre class="expected">* [x] Checked!</pre>
</div>
<div class="case" data-name="basic table">
<div class="input">
<table>
<thead>
<tr>
<th>Column 1</th>
<th>Column 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>Row 1, Column 1</td>
<td>Row 1, Column 2</td>
</tr>
<tr>
<td>Row 2, Column 1</td>
<td>Row 2, Column 2</td>
</tr>
</tbody>
</table>
</div>
<pre class="expected">| Column 1 | Column 2 |
| --- | --- |
| Row 1, Column 1 | Row 1, Column 2 |
| Row 2, Column 1 | Row 2, Column 2 |</pre>
</div>
<div class="case" data-name="cell alignment">
<div class="input">
<table>
<thead>
<tr>
<th align="left">Column 1</th>
<th align="center">Column 2</th>
<th align="right">Column 3</th>
<th align="foo">Column 4</th>
</tr>
</thead>
<tbody>
<tr>
<td>Row 1, Column 1</td>
<td>Row 1, Column 2</td>
<td>Row 1, Column 3</td>
<td>Row 1, Column 4</td>
</tr>
<tr>
<td>Row 2, Column 1</td>
<td>Row 2, Column 2</td>
<td>Row 2, Column 3</td>
<td>Row 2, Column 4</td>
</tr>
</tbody>
</table>
</div>
<pre class="expected">| Column 1 | Column 2 | Column 3 | Column 4 |
| :-- | :-: | --: | --- |
| Row 1, Column 1 | Row 1, Column 2 | Row 1, Column 3 | Row 1, Column 4 |
| Row 2, Column 1 | Row 2, Column 2 | Row 2, Column 3 | Row 2, Column 4 |</pre>
</div>
<div class="case" data-name="empty cells">
<div class="input">
<table>
<thead>
<tr>
<th align="left">Column 1</th>
<th align="center">Column 2</th>
<th align="right">Column 3</th>
<th align="foo">Column 4</th>
</tr>
</thead>
<tbody>
<tr>
<td></td>
<td>Row 1, Column 2</td>
<td>Row 1, Column 3</td>
<td>Row 1, Column 4</td>
</tr>
<tr>
<td>Row 2, Column 1</td>
<td></td>
<td>Row 2, Column 3</td>
<td>Row 2, Column 4</td>
</tr>
<tr>
<td>Row 3, Column 1</td>
<td>Row 3, Column 2</td>
<td></td>
<td>Row 3, Column 4</td>
</tr>
<tr>
<td>Row 4, Column 1</td>
<td>Row 4, Column 2</td>
<td>Row 4, Column 3</td>
<td></td>
</tr>
<tr>
<td></td>
<td></td>
<td></td>
<td>Row 5, Column 4</td>
</tr>
</tbody>
</table>
</div>
<pre class="expected">| Column 1 | Column 2 | Column 3 | Column 4 |
| :-- | :-: | --: | --- |
| | Row 1, Column 2 | Row 1, Column 3 | Row 1, Column 4 |
| Row 2, Column 1 | | Row 2, Column 3 | Row 2, Column 4 |
| Row 3, Column 1 | Row 3, Column 2 | | Row 3, Column 4 |
| Row 4, Column 1 | Row 4, Column 2 | Row 4, Column 3 | |
| | | | Row 5, Column 4 |</pre>
</div>
<div class="case" data-name="empty rows">
<div class="input">
<table>
<thead>
<td>Heading 1</td>
<td>Heading 2</td>
</thead>
<tbody>
<tr>
<td>Row 1</td>
<td>Row 1</td>
</tr>
<tr>
<td></td>
<td></td>
</tr>
<tr>
<td>Row 3</td>
<td>Row 3</td>
</tr>
</tbody>
</table>
</div>
<pre class="expected">| Heading 1 | Heading 2 |
| --- | --- |
| Row 1 | Row 1 |
| Row 3 | Row 3 |</pre>
</div>
<div class="case" data-name="th in first row">
<div class="input">
<table>
<tr>
<th>Heading</th>
</tr>
<tr>
<td>Content</td>
</tr>
</table>
</div>
<pre class="expected">| Heading |
| --- |
| Content |</pre>
</div>
<div class="case" data-name="th first row in tbody">
<div class="input">
<table>
<tbody>
<tr>
<th>Heading</th>
</tr>
<tr>
<td>Content</td>
</tr>
</tbody>
</table>
</div>
<pre class="expected">| Heading |
| --- |
| Content |</pre>
</div>
<div class="case" data-name="table with two tbodies">
<div class="input">
<table>
<tbody>
<tr>
<th>Heading</th>
</tr>
<tr>
<td>Content</td>
</tr>
</tbody>
<tbody>
<tr>
<th>Heading</th>
</tr>
<tr>
<td>Content</td>
</tr>
</tbody>
</table>
</div>
<pre class="expected">| Heading |
| --- |
| Content |
| Heading |
| Content |</pre>
</div>
<div class="case" data-name="heading cells in both thead and tbody">
<div class="input">
<table>
<thead><tr><th>Heading</th></tr></thead>
<tbody><tr><th>Cell</th></tr></tbody>
</table>
</div>
<pre class="expected">| Heading |
| --- |
| Cell |</pre>
</div>
<div class="case" data-name="empty head">
<div class="input">
<table>
<thead><tr><th></th></tr></thead>
<tbody><tr><th>Heading</th></tr></tbody>
</table>
</div>
<pre class="expected">| Heading |
| --- |</pre>
</div>
<div class="case" data-name="non-definitive heading row (converted but with empty header)">
<div class="input">
<table>
<tr><td>Row 1 Cell 1</td><td>Row 1 Cell 2</td></tr>
<tr><td>Row 2 Cell 1</td><td>Row 2 Cell 2</td></tr>
</table>
</div>
<pre class="expected">| | |
| --- | --- |
| Row 1 Cell 1 | Row 1 Cell 2 |
| Row 2 Cell 1 | Row 2 Cell 2 |</pre>
</div>
<div class="case" data-name="non-definitive heading row with th (converted but with empty header)">
<div class="input">
<table>
<tr>
<th>Heading</th>
<td>Not a heading</td>
</tr>
<tr>
<td>Heading</td>
<td>Not a heading</td>
</tr>
</table>
</div>
<pre class="expected">| | |
| --- | --- |
| Heading | Not a heading |
| Heading | Not a heading |</pre>
</div>
<div class="case" data-name="highlighted code block with html">
<div class="input">
<div class="highlight highlight-text-html-basic">
<pre>&lt;<span class="pl-ent">p</span>&gt;Hello world&lt;/<span class="pl-ent">p</span>&gt;</pre>
</div>
</div>
<pre class="expected">```html
&lt;p&gt;Hello world&lt;/p&gt;
```</pre>
</div>
<div class="case" data-name="highlighted code block with js">
<div class="input">
<div class="highlight highlight-source-js">
<pre>;(<span class="pl-k">function</span> () {})()</pre>
</div>
</div>
<pre class="expected">```js
;(function () {})()
```</pre>
</div>
<!-- /TEST CASES -->
<script src="turndown-plugin-gfm-test.browser.js"></script>
</body>
</html>

View File

@ -0,0 +1,13 @@
const Attendant = require('turndown-attendant');
const TurndownService = require('turndown');
const gfm = require('../lib/turndown-plugin-gfm.cjs').gfm;
const attendant = new Attendant({
file: `${__dirname}/index.html`,
TurndownService: TurndownService,
beforeEach: function(turndownService) {
turndownService.use(gfm);
},
});
attendant.run();

5
packages/turndown/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
dist
lib
node_modules
npm-debug.log
test/*browser.js

View File

@ -0,0 +1,2 @@
[test/index.html]
scopeAttributes = attr.keep-whitespace

View File

@ -0,0 +1,4 @@
language: node_js
node_js:
- "node"
- "6"

21
packages/turndown/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017 Dom Christie
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

229
packages/turndown/README.md Normal file
View File

@ -0,0 +1,229 @@
# Turndown
[![Build Status](https://travis-ci.org/domchristie/turndown.svg?branch=master)](https://travis-ci.org/domchristie/turndown)
Convert HTML into Markdown with JavaScript.
## Modifications
**This is a mod of the original turndown package for use with Joplin.** The following changes have been made:
- Remove JavaScript code from links.
- Prevent newlines inside link text.
- Fixed ordered lists indentation when there are more than 9 items.
- Added support for `<picture>` tags.
- Fixed encoding of anchor URLs.
- Support named anchors (`<a href="#internal-link">Internal link</a>`, which would link to `<a id="internal-link"></a>`)
- Detect more types of code blocks based on special cases.
- Handle MathJax blocks
- Allow a rule to specify whether it wants its content escaped or not
- Handle [non-OL ordered lists](https://developer.mozilla.org/en-US/docs/Web/CSS/list-style-type)
- Added option `preserveImageTagsWithSize` to keep `<img/>` tags as HTML (no Markdown conversion) if they have width or height attributes
### to-markdown has been renamed to Turndown. See the [migration guide](https://github.com/domchristie/to-markdown/wiki/Migrating-from-to-markdown-to-Turndown) for details.
## Installation
npm:
```
npm install joplin-turndown
```
Browser:
```html
<script src="https://unpkg.com/turndown/dist/turndown.js"></script>
```
For usage with RequireJS, UMD versions are located in `lib/turndown.umd.js` (for Node.js) and `lib/turndown.browser.umd.js` for browser usage. These files are generated when the npm package is published. To generate them manually, clone this repo and run `npm run build`.
## Usage
```js
// For Node.js
var TurndownService = require('turndown')
var turndownService = new TurndownService()
var markdown = turndownService.turndown('<h1>Hello world!</h1>')
```
Turndown also accepts DOM nodes as input (either element nodes, document nodes, or document fragment nodes):
```js
var markdown = turndownService.turndown(document.getElementById('content'))
```
## Options
Options can be passed in to the constructor on instantiation.
| Option | Valid values | Default |
| :-------------------- | :------------ | :------ |
| `headingStyle` | `setext` or `atx` | `setext` |
| `hr` | Any [Thematic break](http://spec.commonmark.org/0.27/#thematic-breaks) | `* * *` |
| `bulletListMarker` | `-`, `+`, or `*` | `*` |
| `codeBlockStyle` | `indented` or `fenced` | `indented` |
| `fence` | ` ``` ` or `~~~` | ` ``` ` |
| `emDelimiter` | `_` or `*` | `_` |
| `strongDelimiter` | `**` or `__` | `**` |
| `linkStyle` | `inlined` or `referenced` | `inlined` |
| `linkReferenceStyle` | `full`, `collapsed`, or `shortcut` | `full` |
### Advanced Options
| Option | Valid values | Default |
| :-------------------- | :------------ | :------ |
| `blankReplacement` | rule replacement function | See **Special Rules** below |
| `keepReplacement` | rule replacement function | See **Special Rules** below |
| `defaultReplacement` | rule replacement function | See **Special Rules** below |
## Methods
### `addRule(key, rule)`
The `key` parameter is a unique name for the rule for easy reference. Example:
```js
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
})
```
`addRule` returns the `TurndownService` instance for chaining.
See **Extending with Rules** below.
### `keep(filter)`
Determines which elements are to be kept and rendered as HTML. By default, Turndown does not keep any elements. The filter parameter works like a rule filter (see section on filters belows). Example:
```js
turndownService.keep(['del', 'ins'])
turndownService.turndown('<p>Hello <del>world</del><ins>World</ins></p>') // 'Hello <del>world</del><ins>World</ins>'
```
This will render `<del>` and `<ins>` elements as HTML when converted.
`keep` can be called multiple times, with the newly added keep filters taking precedence over older ones. Keep filters will be overridden by the standard CommonMark rules and any added rules. To keep elements that are normally handled by those rules, add a rule with the desired behaviour.
`keep` returns the `TurndownService` instance for chaining.
### `remove(filter)`
Determines which elements are to be removed altogether i.e. converted to an empty string. By default, Turndown does not remove any elements. The filter parameter works like a rule filter (see section on filters belows). Example:
```js
turndownService.remove('del')
turndownService.turndown('<p>Hello <del>world</del><ins>World</ins></p>') // 'Hello World'
```
This will remove `<del>` elements (and contents).
`remove` can be called multiple times, with the newly added remove filters taking precedence over older ones. Remove filters will be overridden by the keep filters, standard CommonMark rules, and any added rules. To remove elements that are normally handled by those rules, add a rule with the desired behaviour.
`remove` returns the `TurndownService` instance for chaining.
### `use(plugin|array)`
Use a plugin, or an array of plugins. Example:
```js
// Import plugins from turndown-plugin-gfm
var turndownPluginGfm = require('turndown-plugin-gfm')
var gfm = turndownPluginGfm.gfm
var tables = turndownPluginGfm.tables
var strikethrough = turndownPluginGfm.strikethrough
// Use the gfm plugin
turndownService.use(gfm)
// Use the table and strikethrough plugins only
turndownService.use([tables, strikethrough])
```
`use` returns the `TurndownService` instance for chaining.
See **Plugins** below.
## Extending with Rules
Turndown can be extended by adding **rules**. A rule is a plain JavaScript object with `filter` and `replacement` properties. For example, the rule for converting `<p>` elements is as follows:
```js
{
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
}
```
The filter selects `<p>` elements, and the replacement function returns the `<p>` contents separated by two new lines.
### `filter` String|Array|Function
The filter property determines whether or not an element should be replaced with the rule's `replacement`. DOM nodes can be selected simply using a tag name or an array of tag names:
* `filter: 'p'` will select `<p>` elements
* `filter: ['em', 'i']` will select `<em>` or `<i>` elements
Alternatively, the filter can be a function that returns a boolean depending on whether a given node should be replaced. The function is passed a DOM node as well as the `TurndownService` options. For example, the following rule selects `<a>` elements (with an `href`) when the `linkStyle` option is `inlined`:
```js
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
}
```
### `replacement` Function
The replacement function determines how an element should be converted. It should return the Markdown string for a given node. The function is passed the node's content, the node itself, and the `TurndownService` options.
The following rule shows how `<em>` elements are converted:
```js
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
return options.emDelimiter + content + options.emDelimiter
}
}
```
### Special Rules
**Blank rule** determines how to handle blank elements. It overrides every rule (even those added via `addRule`). A node is blank if it only contains whitespace, and it's not an `<a>`, `<td>`,`<th>` or a void element. Its behaviour can be customised using the `blankReplacement` option.
**Keep rules** determine how to handle the elements that should not be converted, i.e. rendered as HTML in the Markdown output. By default, no elements are kept. Block-level elements will be separated from surrounding content by blank lines. Its behaviour can be customised using the `keepReplacement` option.
**Remove rules** determine which elements to remove altogether. By default, no elements are removed.
**Default rule** handles nodes which are not recognised by any other rule. By default, it outputs the node's text content (separated by blank lines if it is a block-level element). Its behaviour can be customised with the `defaultReplacement` option.
### Rule Precedence
Turndown iterates over the set of rules, and picks the first one that matches the `filter`. The following list describes the order of precedence:
1. Blank rule
2. Added rules (optional)
3. Commonmark rules
4. Keep rules
5. Remove rules
6. Default rule
## Plugins
The plugin API provides a convenient way for developers to apply multiple extensions. A plugin is just a function that is called with the `TurndownService` instance.
## License
turndown is copyright © 2017+ Dom Christie and released under the MIT license.

View File

@ -0,0 +1,8 @@
#!/bin/bash
set -e
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
ROOT_DIR="$SCRIPT_DIR/../.."
npm run build
cd $ROOT_DIR/packages/app-cli && npm run test -- HtmlToMd

View File

@ -0,0 +1,9 @@
import config from './rollup.config';
export default config({
output: {
file: 'lib/turndown.browser.cjs.js',
format: 'cjs',
},
browser: true,
});

View File

@ -0,0 +1,9 @@
import config from './rollup.config';
export default config({
output: {
file: 'lib/turndown.browser.es.js',
format: 'es',
},
browser: true,
});

View File

@ -0,0 +1,9 @@
import config from './rollup.config';
export default config({
output: {
file: 'lib/turndown.browser.umd.js',
format: 'umd',
},
browser: true,
});

View File

@ -0,0 +1,9 @@
import config from './rollup.config';
export default config({
output: {
file: 'lib/turndown.cjs.js',
format: 'cjs',
},
browser: false,
});

View File

@ -0,0 +1,9 @@
import config from './rollup.config';
export default config({
output: {
file: 'lib/turndown.es.js',
format: 'es',
},
browser: false,
});

View File

@ -0,0 +1,9 @@
import config from './rollup.config';
export default config({
output: {
file: 'dist/turndown.js',
format: 'iife',
},
browser: true,
});

View File

@ -0,0 +1,17 @@
import commonjs from 'rollup-plugin-commonjs';
import replace from 'rollup-plugin-replace';
import resolve from 'rollup-plugin-node-resolve';
export default function(config) {
return {
input: 'src/turndown.js',
name: 'TurndownService',
output: config.output,
external: ['jsdom'],
plugins: [
commonjs(),
replace({ 'process.browser': JSON.stringify(!!config.browser) }),
resolve(),
],
};
}

View File

@ -0,0 +1,8 @@
import config from './rollup.config';
export default config({
output: {
file: 'lib/turndown.umd.js',
format: 'umd',
},
});

View File

@ -0,0 +1,282 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Turndown Demo</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/normalize/7.0.0/normalize.min.css" />
<style>
.cf:before,
.cf:after {
content: " ";
display: table;
}
.cf:after {
clear: both;
}
.cf {
*zoom: 1;
}
* {
-webkit-box-sizing: border-box;
-moz-box-sizing: border-box;
box-sizing: border-box;
}
body {
background-color: #ccc;
margin: 0 auto;
font-size: 14px;
font-family: sans-serif;
line-height: 1.4;
color: #333;
}
header {
padding: 1em;
overflow: hidden;
background-color: #fff;
}
footer {
text-align: center;
color: #666;
text-shadow: 0 1px 0 #ddd;
}
a,
a:visited {
font-weight: 700;
text-decoration: none;
}
h1 {
float: left;
margin: 0;
font-size: 1em;
}
h2 {
color: #fff;
margin-bottom: 0;
}
.col,
.form-group {
padding: 0 10px;
}
.form-group {
float: left;
}
textarea {
width: 100%;
height: 600px;
margin: 0;
padding: .5em;
overflow: auto;
border: none;
background-color: #fff;
font-family: courier, monospace;
font-size: inherit;
color: inherit;
}
#input {
background: #333;
color: #fff;
}
.toolbar {
padding-top: 5px;
padding-bottom: 5px;
background-color: #e6e6e6;
}
select {
display: block;
width: 100%;
font-size: 14px;
}
@media (min-width: 768px) {
body {
font-size: 16px;
}
.col {
float: left;
width: 50%;
padding: 0 15px;
}
.row {
padding-right: 15px;
padding-left: 15px;
}
}
.form-group label {
font-size: 14px;
}
</style>
<script src="https://unpkg.com/turndown/dist/turndown.js"></script>
</head>
<body>
<header>
<h1>turndown</h1>
<a style="float: right" href="https://github.com/domchristie/turndown">Source on GitHub</a>
</header>
<div class="row cf">
<div class="col">
<h2>HTML</h2>
<textarea cols="100" rows=10 id="input"><h1>Turndown Demo</h1>
<p>This demonstrates <a href="https://github.com/domchristie/turndown">turndown</a> an HTML to Markdown converter in JavaScript.</p>
<h2>Usage</h2>
<pre><code class="language-js">var turndownService = new TurndownService()
console.log(
turndownService.turndown('&amp;lt;h1&amp;gt;Hello world&amp;lt;/h1&amp;gt;')
)</code></pre>
<hr />
<p>It aims to be <a href="http://commonmark.org/">CommonMark</a>
compliant, and includes options to style the output. These options include:</p>
<ul>
<li>headingStyle (setext or atx)</li>
<li>horizontalRule (*, -, or _)</li>
<li>bullet (*, -, or +)</li>
<li>codeBlockStyle (indented or fenced)</li>
<li>fence (` or ~)</li>
<li>emDelimiter (_ or *)</li>
<li>strongDelimiter (** or __)</li>
<li>linkStyle (inlined or referenced)</li>
<li>linkReferenceStyle (full, collapsed, or shortcut)</li>
</ul></textarea>
</div>
<div class="col">
<h2>Markdown</h2>
<textarea readonly cols="100" rows=10 id="output"></textarea>
</div>
</div>
<div class="row cf">
<form method="get" action="/turndown" id="options">
<div class="form-group">
<label for="headingStyle">Heading style</label>
<select name="headingStyle" id="headingStyle">
<option value="setext">setext</option>
<option value="atx">atx</option>
</select>
</div>
<div class="form-group">
<label for="hr">Horizontal rule</label>
<select name="hr" id="hr">
<option value="* * *">* * *</option>
<option value="- - -">- - -</option>
<option value="_ _ _">_ _ _</option>
</select>
</div>
<div class="form-group">
<label for="bulletListMarker">Bullet</label>
<select name="bulletListMarker" id="bulletListMarker">
<option value="*">*</option>
<option value="-">-</option>
<option value="+">+</option>
</select>
</div>
<div class="form-group">
<label for="codeBlockStyle">Code block style</label>
<select name="codeBlockStyle" id="codeBlockStyle">
<option value="indented">indented</option>
<option value="fenced">fenced</option>
</select>
</div>
<div class="form-group">
<label for="fence">Fence</label>
<select name="fence" id="fence">
<option value="```">```</option>
<option value="~~~">~~~</option>
</select>
</div>
<div class="form-group">
<label for="emDelimiter">Em delimiter</label>
<select name="emDelimiter" id="emDelimiter">
<option value="_">_</option>
<option value="*">*</option>
</select>
</div>
<div class="form-group">
<label for="strongDelimiter">Strong delimiter</label>
<select name="strongDelimiter" id="strongDelimiter">
<option value="**">**</option>
<option value="__">__</option>
</select>
</div>
<div class="form-group">
<label for="linkStyle">Link style</label>
<select name="linkStyle" id="linkStyle">
<option value="inlined">inlined</option>
<option value="referenced">referenced</option>
</select>
</div>
<div class="form-group">
<label for="linkReferenceStyle">Link reference style</label>
<select name="linkReferenceStyle" id="linkReferenceStyle">
<option value="full">full</option>
<option value="collapsed">collapsed</option>
<option value="shortcut">shortcut</option>
</select>
</div>
</form>
</div>
<footer><p>turndown is copyright © 2017 <a href="http://www.domchristie.co.uk/">Dom Christie</a> and is released under the MIT license</p></footer>
<script>
;(function () {
var input = document.getElementById('input')
var output = document.getElementById('output')
var optionsForm = document.getElementById('options')
var turndownService = new window.TurndownService(options())
input.addEventListener('input', update)
optionsForm.addEventListener('change', function () {
turndownService = new window.TurndownService(options())
update()
})
update()
function update () {
output.value = turndownService.turndown(input.value)
}
function options () {
var opts = {}
var inputs = optionsForm.getElementsByTagName('select')
for (var i = 0; i < inputs.length; i++) {
var input = inputs[i]
opts[input.name] = input.value
}
return opts
}
})()
</script>
</body>
</html>

4528
packages/turndown/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
{
"name": "@joplin/turndown",
"description": "A library that converts HTML to Markdown",
"version": "4.0.30",
"author": "Dom Christie",
"main": "lib/turndown.cjs.js",
"module": "lib/turndown.es.js",
"jsnext:main": "lib/turndown.es.js",
"browser": {
"jsdom": false
},
"dependencies": {
"css": "^2.2.4",
"html-entities": "^1.2.1",
"jsdom": "^15.2.1"
},
"devDependencies": {
"browserify": "^14.5.0",
"rollup": "^0.50.0",
"rollup-plugin-commonjs": "^8.2.6",
"rollup-plugin-node-resolve": "^3.0.0",
"rollup-plugin-replace": "^2.0.0",
"standard": "^10.0.3",
"turndown-attendant": "0.0.3"
},
"files": [
"lib",
"dist"
],
"keywords": [
"converter",
"html",
"markdown"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/laurent22/joplin-turndown.git"
},
"scripts": {
"build-all": "npm run build-cjs && npm run build-es && npm run build-umd && npm run build-iife",
"build": "rollup -c config/rollup.config.cjs.js",
"build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js",
"build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js",
"build-umd": "rollup -c config/rollup.config.umd.js && rollup -c config/rollup.config.browser.umd.js",
"build-iife": "rollup -c config/rollup.config.iife.js",
"build-test": "browserify test/turndown-test.js --outfile test/turndown-test.browser.js",
"postinstall": "npm run build"
}
}

View File

@ -0,0 +1,3 @@
#!/bin/bash
npm version patch
npm publish

View File

@ -0,0 +1,146 @@
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
function containsOnlySpaces(text) {
if (!text) return false;
for (let i = 0; i < text.length; i++) {
if (text[i] !== ' ') return false;
}
return true;
}
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element
var isBlock = options.isBlock
var isVoid = options.isVoid
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
}
if (!element.firstChild || isPre(element)) return
var prevText = null
var prevVoid = false
var prev = null
var node = next(prev, element, isPre)
// We keep track of whether the previous was only spaces or not. This prevent the case where multiple empty blocks are
// added, which results in multiple spaces. This spaces are then incorrectly interpreted as a code block by renderers.
// So by keeping track of this, we make sure that only one space at most is added.
var prevTextIsOnlySpaces = false;
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ')
if ((!prevText || / $/.test(prevText.data)) &&
!prevVoid && text[0] === ' ') {
text = text.substr(1)
}
var textIsOnlySpaces = containsOnlySpaces(text);
// `text` might be empty at this point.
if (!text || (textIsOnlySpaces && prevTextIsOnlySpaces)) {
node = remove(node)
continue
}
prevTextIsOnlySpaces = textIsOnlySpaces;
node.data = text
prevText = node
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '')
}
prevText = null
prevVoid = false
} else if (isVoid(node)) {
// Avoid trimming space around non-block, non-BR void elements.
prevText = null
prevVoid = true
}
} else {
node = remove(node)
continue
}
var nextNode = next(prev, node, isPre)
prev = node
node = nextNode
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '')
if (!prevText.data) {
remove(prevText)
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode
node.parentNode.removeChild(node)
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
export default collapseWhitespace

View File

@ -0,0 +1,601 @@
import { repeat, isCodeBlockSpecialCase1, isCodeBlockSpecialCase2, isCodeBlock } from './utilities'
const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = (new Entities()).encode;
function attributesHtml(attributes, options = null) {
if (!attributes) return '';
options = Object.assign({}, {
skipEmptyClass: false,
}, options);
const output = [];
for (let attr of attributes) {
if (attr.name === 'class' && !attr.value && options.skipEmptyClass) continue;
output.push(`${attr.name}="${htmlentities(attr.value)}"`);
}
return output.join(' ');
}
var rules = {}
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
}
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
}
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1))
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length)
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
}
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = content.replace(/^\n+|\n+$/g, '')
content = content.replace(/^/gm, '> ')
return '\n\n' + content + '\n\n'
}
}
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
}
// OL elements are ordered lists, but other elements with a "list-style-type: decimal" style
// should also be considered ordered lists, at least that's how they are rendered
// in browsers.
// https://developer.mozilla.org/en-US/docs/Web/CSS/list-style-type
function isOrderedList(e) {
if (e.nodeName === 'OL') return true;
return e.style && e.style.listStyleType === 'decimal';
}
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
content = content
.replace(/^\n+/, '') // remove leading newlines
.replace(/\n+$/, '\n') // replace trailing newlines with just a single one
var prefix = options.bulletListMarker + ' '
content = content.replace(/\n/gm, '\n ') // indent
const joplinCheckbox = joplinCheckboxInfo(node);
if (joplinCheckbox) {
prefix = '- [' + (joplinCheckbox.checked ? 'x' : ' ') + '] ';
} else {
var parent = node.parentNode
if (isOrderedList(parent)) {
var start = parent.getAttribute('start')
var index = Array.prototype.indexOf.call(parent.children, node)
var indexStr = (start ? Number(start) + index : index + 1) + ''
// The content of the line that contains the bullet must align wih the following lines.
//
// i.e it should be:
//
// 9. my content
// second line
// 10. next one
// second line
//
// But not:
//
// 9. my content
// second line
// 10. next one
// second line
//
prefix = indexStr + '.' + ' '.repeat(3 - indexStr.length)
}
}
return (
prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
)
}
}
rules.indentedCodeBlock = {
filter: function (node, options) {
if (options.codeBlockStyle !== 'indented') return false
return isCodeBlock(node);
},
replacement: function (content, node, options) {
const handledNode = isCodeBlockSpecialCase1(node) ? node : node.firstChild
return (
'\n\n ' +
handledNode.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
}
rules.fencedCodeBlock = {
filter: function (node, options) {
if (options.codeBlockStyle !== 'fenced') return false;
return isCodeBlock(node);
},
replacement: function (content, node, options) {
let handledNode = node.firstChild;
if (isCodeBlockSpecialCase1(node) || isCodeBlockSpecialCase2(node)) handledNode = node;
var className = handledNode.className || ''
var language = (className.match(/language-(\S+)/) || [null, ''])[1]
return (
'\n\n' + options.fence + language + '\n' +
content +
'\n' + options.fence + '\n\n'
)
}
}
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
}
function filterLinkContent (content) {
return content.trim().replace(/[\n\r]+/g, '<br>')
}
function filterLinkHref (href) {
if (!href) return ''
href = href.trim()
if (href.toLowerCase().indexOf('javascript:') === 0) return '' // We don't want to keep js code in the markdown
// Replace the spaces with %20 because otherwise they can cause problems for some
// renderer and space is not a valid URL character anyway.
href = href.replace(/ /g, '%20');
// Brackets also should be escaped
href = href.replace(/\(/g, '%28');
href = href.replace(/\)/g, '%29');
return href
}
function filterImageTitle(title) {
if (!title) return ''
title = title.trim()
title = title.replace(/\"/g, '&quot;');
title = title.replace(/\(/g, '&#40;');
title = title.replace(/\)/g, '&#41;');
return title
}
function getNamedAnchorFromLink(node, options) {
var id = node.getAttribute('id')
if (!id) id = node.getAttribute('name')
if (id) id = id.trim();
if (id && options.anchorNames.indexOf(id.toLowerCase()) >= 0) {
return '<a id="' + htmlentities(id) + '"></a>';
} else {
return '';
}
}
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
(node.getAttribute('href') || node.getAttribute('name') || node.getAttribute('id'))
)
},
replacement: function (content, node, options) {
var href = filterLinkHref(node.getAttribute('href'))
if (!href) {
return getNamedAnchorFromLink(node, options) + filterLinkContent(content)
} else {
var title = node.title && node.title !== href ? ' "' + node.title + '"' : ''
if (!href) title = ''
return getNamedAnchorFromLink(node, options) + '[' + filterLinkContent(content) + '](' + href + title + ')'
}
}
}
// Normally a named anchor would be <a name="something"></a> but
// you can also find <span id="something">Something</span> so the
// rule below handle this.
// Fixes https://github.com/laurent22/joplin/issues/1876
rules.otherNamedAnchors = {
filter: function (node, options) {
return !!getNamedAnchorFromLink(node, options);
},
replacement: function (content, node, options) {
return getNamedAnchorFromLink(node, options) + content;
}
}
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = filterLinkHref(node.getAttribute('href'))
var title = node.title ? ' "' + node.title + '"' : ''
if (!href) title = ''
var replacement
var reference
content = filterLinkContent(content)
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]'
reference = '[' + content + ']: ' + href + title
break
case 'shortcut':
replacement = '[' + content + ']'
reference = '[' + content + ']: ' + href + title
break
default:
var id = this.references.length + 1
replacement = '[' + content + '][' + id + ']'
reference = '[' + id + ']: ' + href + title
}
this.references.push(reference)
return replacement
},
references: [],
append: function (options) {
var references = ''
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n'
this.references = [] // Reset references
}
return references
}
}
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
if (node.isCode) return content;
return options.emDelimiter + content + options.emDelimiter
}
}
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
if (node.isCode) return content;
return options.strongDelimiter + content + options.strongDelimiter
}
}
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content.trim()) return ''
var delimiter = '`'
var leadingSpace = ''
var trailingSpace = ''
var matches = content.match(/`+/gm)
if (matches) {
if (/^`/.test(content)) leadingSpace = ' '
if (/`$/.test(content)) trailingSpace = ' '
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`'
}
return delimiter + leadingSpace + content + trailingSpace + delimiter
}
}
function imageMarkdownFromNode(node, options = null) {
options = Object.assign({}, {
preserveImageTagsWithSize: false,
}, options);
if (options.preserveImageTagsWithSize && (node.getAttribute('width') || node.getAttribute('height'))) {
return node.outerHTML;
}
var alt = node.alt || ''
var src = filterLinkHref(node.getAttribute('src') || '')
var title = node.title || ''
var titlePart = title ? ' "' + filterImageTitle(title) + '"' : ''
return src ? '![' + alt.replace(/([[\]])/g, '\\$1') + ']' + '(' + src + titlePart + ')' : ''
}
function imageUrlFromSource(node) {
// Format of srcset can be:
// srcset="kitten.png"
// or:
// srcset="kitten.png, kitten@2X.png 2x"
let src = node.getAttribute('srcset');
if (!src) src = node.getAttribute('data-srcset');
if (!src) return '';
const s = src.split(',');
if (!s.length) return '';
src = s[0];
src = src.split(' ');
return src[0];
}
rules.image = {
filter: 'img',
replacement: function (content, node, options) {
return imageMarkdownFromNode(node, options);
}
}
rules.picture = {
filter: 'picture',
replacement: function (content, node, options) {
if (!node.childNodes) return '';
let firstSource = null;
let firstImg = null;
for (let i = 0; i < node.childNodes.length; i++) {
const child = node.childNodes[i];
if (child.nodeName === 'SOURCE' && !firstSource) firstSource = child;
if (child.nodeName === 'IMG') firstImg = child;
}
if (firstImg && firstImg.getAttribute('src')) {
return imageMarkdownFromNode(firstImg, options);
} else if (firstSource) {
// A <picture> tag can have multiple <source> tag and the browser should decide which one to download
// but for now let's pick the first one.
const src = imageUrlFromSource(firstSource);
return src ? '![](' + src + ')' : '';
}
return '';
}
}
function findFirstDescendant(node, byType, name) {
for (const childNode of node.childNodes) {
if (byType === 'class' && childNode.classList.contains(name)) return childNode;
if (byType === 'nodeName' && childNode.nodeName === name) return childNode;
const sub = findFirstDescendant(childNode, byType, name);
if (sub) return sub;
}
return null;
}
function findParent(node, byType, name) {
while (true) {
const p = node.parentNode;
if (!p) return null;
if (byType === 'class' && p.classList && p.classList.contains(name)) return p;
if (byType === 'nodeName' && p.nodeName === name) return p;
node = p;
}
}
// ===============================================================================
// MATHJAX support
//
// When encountering Mathjax elements there's first the rendered Mathjax,
// which we want to skip because it cannot be converted reliably to Markdown.
// This tag is followed by the actual MathJax script in a <script> tag, which
// is what we want to export. By wrapping this text in "$" or "$$" it will
// be displayed correctly by Katex in Joplin.
//
// See mathjax_inline and mathjax_block test cases.
// ===============================================================================
function majaxScriptBlockType(node) {
if (node.nodeName !== 'SCRIPT') return null;
const a = node.getAttribute('type');
if (!a || a.indexOf('math/tex') < 0) return null;
return a.indexOf('display') >= 0 ? 'block' : 'inline';
}
rules.mathjaxRendered = {
filter: function (node) {
return node.nodeName === 'SPAN' && node.getAttribute('class') === 'MathJax';
},
replacement: function (content, node, options) {
return '';
}
}
rules.mathjaxScriptInline = {
filter: function (node) {
return majaxScriptBlockType(node) === 'inline';
},
escapeContent: function() {
// We want the raw unescaped content since this is what Katex will need to render
// If we escape, it will double the \\ in particular.
return false;
},
replacement: function (content, node, options) {
return '$' + content + '$';
}
}
rules.mathjaxScriptBlock = {
filter: function (node) {
return majaxScriptBlockType(node) === 'block';
},
escapeContent: function() {
return false;
},
replacement: function (content, node, options) {
return '$$\n' + content + '\n$$';
}
}
// ===============================================================================
// End of MATHJAX support
// ===============================================================================
// ===============================================================================
// Joplin "noMdConv" support
//
// Tags that have the class "jop-noMdConv" are not converted to Markdown
// but left as HTML. This is useful when converting from MD to HTML, then
// back to MD again. In that case, we'd want to preserve the code that
// was in HTML originally.
// ===============================================================================
rules.joplinHtmlInMarkdown = {
filter: function (node) {
return node && node.classList && node.classList.contains('jop-noMdConv');
},
replacement: function (content, node) {
node.classList.remove('jop-noMdConv');
const nodeName = node.nodeName.toLowerCase();
let attrString = attributesHtml(node.attributes, { skipEmptyClass: true });
if (attrString) attrString = ' ' + attrString;
return '<' + nodeName + attrString + '>' + content + '</' + nodeName + '>';
}
}
// ===============================================================================
// Joplin Source block support
//
// This is specific to Joplin: a plugin may convert some Markdown to HTML
// but keep the original source in a hidden <PRE class="joplin-source"> block.
// In that case, when we convert back again from HTML to MD, we use that
// block for lossless conversion.
// ===============================================================================
function joplinEditableBlockInfo(node) {
if (!node.classList.contains('joplin-editable')) return null;
let sourceNode = null;
for (const childNode of node.childNodes) {
if (childNode.classList.contains('joplin-source')) {
sourceNode = childNode;
break;
}
}
if (!sourceNode) return null;
return {
openCharacters: sourceNode.getAttribute('data-joplin-source-open'),
closeCharacters: sourceNode.getAttribute('data-joplin-source-close'),
content: sourceNode.textContent,
};
}
rules.joplinSourceBlock = {
filter: function (node) {
return !!joplinEditableBlockInfo(node);
},
escapeContent: function() {
return false;
},
replacement: function (content, node, options) {
const info = joplinEditableBlockInfo(node);
if (!info) return;
return info.openCharacters + info.content + info.closeCharacters;
}
}
// ===============================================================================
// Checkboxes
// ===============================================================================
function joplinCheckboxInfo(liNode) {
if (liNode.classList.contains('joplin-checkbox')) {
// Handling of this rendering is buggy as it adds extra new lines between each
// list item. However, supporting this rendering is normally no longer needed.
const input = findFirstDescendant(liNode, 'nodeName', 'INPUT');
return {
checked: input && input.getAttribute ? !!input.getAttribute('checked') : false,
renderingType: 1,
};
}
const parentChecklist = findParent(liNode, 'class', 'joplin-checklist');
if (parentChecklist) {
return {
checked: !!liNode.classList && liNode.classList.contains('checked'),
renderingType: 2,
};
}
return null;
}
export default rules

View File

@ -0,0 +1,68 @@
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {})
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser
var canParse = false
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {}
if (process.browser) {
if (shouldUseActiveX()) {
Parser.prototype.parseFromString = function (string) {
var doc = new window.ActiveXObject('htmlfile')
doc.designMode = 'on' // disable on-page scripts
doc.open()
doc.write(string)
doc.close()
return doc
}
} else {
Parser.prototype.parseFromString = function (string) {
var doc = document.implementation.createHTMLDocument('')
doc.open()
doc.write(string)
doc.close()
return doc
}
}
} else {
var JSDOM = require('jsdom').JSDOM
Parser.prototype.parseFromString = function (string) {
return new JSDOM(string).window.document
}
}
return Parser
}
function shouldUseActiveX () {
var useActiveX = false
try {
document.implementation.createHTMLDocument('').open()
} catch (e) {
if (window.ActiveXObject) useActiveX = true
}
return useActiveX
}
export default canParseHTMLNatively() ? root.DOMParser : createHTMLParser()

View File

@ -0,0 +1,60 @@
import { isBlock, isVoid, hasVoid, isCodeBlock } from './utilities'
export default function Node (node) {
node.isBlock = isBlock(node)
node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode || isCodeBlock(node);
node.isBlank = isBlank(node)
node.flankingWhitespace = flankingWhitespace(node)
return node
}
function isBlank (node) {
return (
['A', 'TH', 'TD'].indexOf(node.nodeName) === -1 &&
/^\s*$/i.test(node.textContent) &&
!isVoid(node) &&
!hasVoid(node)
)
}
function flankingWhitespace (node) {
var leading = ''
var trailing = ''
if (!node.isBlock) {
var hasLeading = /^[ \r\n\t]/.test(node.textContent)
var hasTrailing = /[ \r\n\t]$/.test(node.textContent)
if (hasLeading && !isFlankedByWhitespace('left', node)) {
leading = ' '
}
if (hasTrailing && !isFlankedByWhitespace('right', node)) {
trailing = ' '
}
}
return { leading: leading, trailing: trailing }
}
function isFlankedByWhitespace (side, node) {
var sibling
var regExp
var isFlanked
if (side === 'left') {
sibling = node.previousSibling
regExp = / $/
} else {
sibling = node.nextSibling
regExp = /^ /
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue)
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent)
}
}
return isFlanked
}

View File

@ -0,0 +1,32 @@
import collapseWhitespace from './collapse-whitespace'
import HTMLParser from './html-parser'
import { isBlock, isVoid } from './utilities'
export default function RootNode (input) {
var root
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
)
root = doc.getElementById('turndown-root')
} else {
root = input.cloneNode(true)
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid
})
return root
}
var _htmlParser
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser()
return _htmlParser
}

View File

@ -0,0 +1,80 @@
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
export default function Rules (options) {
this.options = options
this._keep = []
this._remove = []
this.blankRule = {
replacement: options.blankReplacement
}
this.keepReplacement = options.keepReplacement
this.defaultRule = {
replacement: options.defaultReplacement
}
this.array = []
for (var key in options.rules) this.array.push(options.rules[key])
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule)
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
})
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
})
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i)
}
}
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i]
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}

View File

@ -0,0 +1,291 @@
import COMMONMARK_RULES from './commonmark-rules'
import Rules from './rules'
import { extend, isCodeBlock } from './utilities'
import RootNode from './root-node'
import Node from './node'
var reduce = Array.prototype.reduce
var leadingNewLinesRegExp = /^\n*/
var trailingNewLinesRegExp = /\n*$/
export default function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: COMMONMARK_RULES,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
anchorNames: [],
br: ' ',
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
}
this.options = extend({}, defaults, options)
this.rules = new Rules(this.options)
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input))
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i])
} else if (typeof plugin === 'function') {
plugin(this)
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule)
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter)
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter)
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return (
string
// Escape backslash escapes!
.replace(/\\(\S)/g, '\\\\$1')
// Escape headings
.replace(/^(#{1,6} )/gm, '\\$1')
// Escape hr
.replace(/^([-*_] *){3,}$/gm, function (match, character) {
return match.split(character).join('\\' + character)
})
// Escape ol bullet points
.replace(/^(\W* {0,3})(\d+)\. /gm, '$1$2\\. ')
// Escape ul bullet points
.replace(/^([^\\\w]*)[*+-] /gm, function (match) {
return match.replace(/([*+-])/g, '\\$1')
})
// Escape blockquote indents
.replace(/^(\W* {0,3})> /gm, '$1\\> ')
// Escape em/strong *
.replace(/\*+(?![*\s\W]).+?\*+/g, function (match) {
return match.replace(/\*/g, '\\*')
})
// Escape em/strong _
.replace(/_+(?![_\s\W]).+?_+/g, function (match) {
return match.replace(/_/g, '\\_')
})
// Escape code _
.replace(/`+(?![`\s\W]).+?`+/g, function (match) {
return match.replace(/`/g, '\\`')
})
// Escape link brackets
.replace(/[\[\]]/g, '\\$&') // eslint-disable-line no-useless-escape
)
}
}
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode, escapeContent = 'auto') {
var self = this
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node)
var replacement = ''
if (node.nodeType === 3) {
if (node.isCode || escapeContent === false) {
replacement = node.nodeValue
} else {
replacement = self.escape(node.nodeValue)
// Escape < and > so that, for example, this kind of HTML text: "This is a tag: &lt;p&gt;" is still rendered as "This is a tag: &lt;p&gt;"
// and not "This is a tag: <p>". If the latter, it means the HTML will be rendered if the viewer supports HTML (which, in Joplin, it does).
replacement = replacement.replace(/<(.+?)>/g, '&lt;$1&gt;');
}
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node)
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options))
}
})
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node)
var content = process.call(this, node, rule.escapeContent ? rule.escapeContent() : 'auto')
var whitespace = node.flankingWhitespace
if (whitespace.leading || whitespace.trailing) content = content.trim()
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Determines the new lines between the current output and the replacement
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns The whitespace to separate the current output and the replacement
* @type String
*/
function separatingNewlines (output, replacement) {
var newlines = [
output.match(trailingNewLinesRegExp)[0],
replacement.match(leadingNewLinesRegExp)[0]
].sort()
var maxNewlines = newlines[newlines.length - 1]
return maxNewlines.length < 2 ? maxNewlines : '\n\n'
}
function join (string1, string2) {
var separator = separatingNewlines(string1, string2)
// Remove trailing/leading newlines and replace with separator
string1 = string1.replace(trailingNewLinesRegExp, '')
string2 = string2.replace(leadingNewLinesRegExp, '')
return string1 + separator + string2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}

View File

@ -0,0 +1,80 @@
const css = require('css');
export function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i]
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key]
}
}
return destination
}
export function repeat (character, count) {
return Array(count + 1).join(character)
}
export var blockElements = [
'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
]
export function isBlock (node) {
return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
}
export var voidElements = [
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
]
export function isVoid (node) {
return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
}
var voidSelector = voidElements.join()
export function hasVoid (node) {
return node.querySelector && node.querySelector(voidSelector)
}
// To handle code that is presented as below (see https://github.com/laurent22/joplin/issues/573)
//
// <td class="code">
// <pre class="python">
// <span style="color: #ff7700;font-weight:bold;">def</span> ma_fonction
// </pre>
// </td>
export function isCodeBlockSpecialCase1(node) {
const parent = node.parentNode
if (!parent) return false;
return parent.classList && parent.classList.contains('code') && parent.nodeName === 'TD' && node.nodeName === 'PRE'
}
// To handle PRE tags that have a monospace font family. In that case
// we assume it is a code block.
export function isCodeBlockSpecialCase2(node) {
if (node.nodeName !== 'PRE') return false;
const style = node.getAttribute('style');
if (!style) return false;
const o = css.parse('pre {' + style + '}');
if (!o.stylesheet.rules.length) return;
const fontFamily = o.stylesheet.rules[0].declarations.find(d => d.property.toLowerCase() === 'font-family');
if (!fontFamily || !fontFamily.value) return false;
const isMonospace = fontFamily.value.split(',').map(e => e.trim().toLowerCase()).indexOf('monospace') >= 0;
return isMonospace;
}
export function isCodeBlock(node) {
if (isCodeBlockSpecialCase1(node) || isCodeBlockSpecialCase2(node)) return true
return (
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
}

View File

@ -0,0 +1,853 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>turndown test runner</title>
<link rel="stylesheet" href="../node_modules/turndown-attendant/dist/styles.css">
</head>
<body>
<!-- TEST CASES -->
<div class="case" data-name="p">
<div class="input"><p>Lorem ipsum</p></div>
<pre class="expected">Lorem ipsum</pre>
</div>
<div class="case" data-name="multiple ps">
<div class="input">
<p>Lorem</p>
<p>ipsum</p>
<p>sit</p>
</div>
<pre class="expected">Lorem
ipsum
sit</pre>
</div>
<div class="case" data-name="em">
<div class="input"><em>em element</em></div>
<pre class="expected">_em element_</pre>
</div>
<div class="case" data-name="i">
<div class="input"><i>i element</i></div>
<pre class="expected">_i element_</pre>
</div>
<div class="case" data-name="strong">
<div class="input"><strong>strong element</strong></div>
<pre class="expected">**strong element**</pre>
</div>
<div class="case" data-name="b">
<div class="input"><b>b element</b></div>
<pre class="expected">**b element**</pre>
</div>
<div class="case" data-name="code">
<div class="input"><code>code element</code></div>
<pre class="expected">`code element`</pre>
</div>
<div class="case" data-name="code containing a backtick">
<div class="input"><code>There is a literal backtick (`) here</code></div>
<pre class="expected">``There is a literal backtick (`) here``</pre>
</div>
<div class="case" data-name="code containing three or more backticks">
<div class="input"><code>here are three ``` here are four ```` that's it</code></div>
<pre class="expected">`here are three ``` here are four ```` that's it`</pre>
</div>
<div class="case" data-name="code containing one or more backticks">
<div class="input"><code>here are three ``` here are four ```` here is one ` that's it</code></div>
<pre class="expected">``here are three ``` here are four ```` here is one ` that's it``</pre>
</div>
<div class="case" data-name="code starting with a backtick">
<div class="input"><code>`starting with a backtick</code></div>
<pre class="expected">`` `starting with a backtick``</pre>
</div>
<div class="case" data-name="code containing markdown syntax">
<div class="input"><code>_emphasis_</code></div>
<pre class="expected">`_emphasis_`</pre>
</div>
<div class="case" data-name="code containing markdown syntax in a span">
<div class="input"><code><span>_emphasis_</span></code></div>
<pre class="expected">`_emphasis_`</pre>
</div>
<div class="case" data-name="h1">
<div class="input"><h1>Level One Heading</h1></div>
<pre class="expected">Level One Heading
=================</pre>
</div>
<div class="case" data-name="h1 as atx" data-options='{"headingStyle":"atx"}'>
<div class="input"><h1>Level One Heading with ATX</h1></div>
<pre class="expected"># Level One Heading with ATX</pre>
</div>
<div class="case" data-name="h2">
<div class="input"><h2>Level Two Heading</h2></div>
<pre class="expected">Level Two Heading
-----------------</pre>
</div>
<div class="case" data-name="h2 as atx" data-options='{"headingStyle":"atx"}'>
<div class="input"><h2>Level Two Heading with ATX</h2></div>
<pre class="expected">## Level Two Heading with ATX</pre>
</div>
<div class="case" data-name="h3">
<div class="input"><h3>Level Three Heading</h3></div>
<pre class="expected">### Level Three Heading</pre>
</div>
<div class="case" data-name="heading with child">
<div class="input"><h4>Level Four Heading with <code>child</code></h4></div>
<pre class="expected">#### Level Four Heading with `child`</pre>
</div>
<div class="case" data-name="invalid heading">
<div class="input"><h7>Level Seven Heading?</h7></div>
<pre class="expected">Level Seven Heading?</pre>
</div>
<div class="case" data-name="hr">
<div class="input"><hr></div>
<pre class="expected">* * *</pre>
</div>
<div class="case" data-name="hr with closing tag">
<div class="input"><hr></hr></div>
<pre class="expected">* * *</pre>
</div>
<div class="case" data-name="hr with option" data-options='{"hr": "- - -"}'>
<div class="input"><hr></div>
<pre class="expected">- - -</pre>
</div>
<div class="case" data-name="br">
<div class="input">More<br>after the break</div>
<pre class="expected">More
after the break</pre>
</div>
<div class="case" data-name="br with visible line-ending" data-options='{"br": "\\"}'>
<div class="input">More<br>after the break</div>
<pre class="expected">More\
after the break</pre>
</div>
<div class="case" data-name="img with no alt">
<div class="input"><img src="http://example.com/logo.png" /></div>
<pre class="expected">![](http://example.com/logo.png)</pre>
</div>
<div class="case" data-name="img with relative src">
<div class="input"><img src="logo.png"></div>
<pre class="expected">![](logo.png)</pre>
</div>
<div class="case" data-name="img with alt">
<div class="input"><img src="logo.png" alt="img with alt"></div>
<pre class="expected">![img with alt](logo.png)</pre>
</div>
<div class="case" data-name="img with no src">
<div class="input"><img></div>
<pre class="expected"></pre>
</div>
<div class="case" data-name="a">
<div class="input"><a href="http://example.com">An anchor</a></div>
<pre class="expected">[An anchor](http://example.com)</pre>
</div>
<div class="case" data-name="a with title">
<div class="input"><a href="http://example.com" title="Title for link">An anchor</a></div>
<pre class="expected">[An anchor](http://example.com "Title for link")</pre>
</div>
<div class="case" data-name="a without a src">
<div class="input"><a id="about-anchor">Anchor without a title</a></div>
<pre class="expected">Anchor without a title</pre>
</div>
<div class="case" data-name="a with a child">
<div class="input"><a href="http://example.com/code">Some <code>code</code></a></div>
<pre class="expected">[Some `code`](http://example.com/code)</pre>
</div>
<div class="case" data-name="a reference" data-options='{"linkStyle": "referenced"}'>
<div class="input"><a href="http://example.com">Reference link</a></div>
<pre class="expected">[Reference link][1]
[1]: http://example.com</pre>
</div>
<div class="case" data-name="a reference with collapsed style" data-options='{"linkStyle": "referenced", "linkReferenceStyle": "collapsed"}'>
<div class="input"><a href="http://example.com">Reference link with collapsed style</a></div>
<pre class="expected">[Reference link with collapsed style][]
[Reference link with collapsed style]: http://example.com</pre>
</div>
<div class="case" data-name="a reference with shortcut style" data-options='{"linkStyle": "referenced", "linkReferenceStyle": "shortcut"}'>
<div class="input"><a href="http://example.com">Reference link with shortcut style</a></div>
<pre class="expected">[Reference link with shortcut style]
[Reference link with shortcut style]: http://example.com</pre>
</div>
<div class="case" data-name="pre/code block">
<div class="input"><pre><code>def code_block
# 42 &lt; 9001
"Hello world!"
end</code></pre></div>
<pre class="expected"> def code_block
# 42 < 9001
"Hello world!"
end</pre>
</div>
<div class="case" data-name="multiple pre/code blocks">
<div class="input"><pre><code>def first_code_block
# 42 &lt; 9001
"Hello world!"
end</code></pre>
<p>next:</p>
<pre><code>def second_code_block
# 42 &lt; 9001
"Hello world!"
end</code></pre></div>
<pre class="expected"> def first_code_block
# 42 < 9001
"Hello world!"
end
next:
def second_code_block
# 42 < 9001
"Hello world!"
end</pre>
</div>
<div class="case" data-name="pre/code block with multiple new lines">
<div class="input"><div><pre><code>Multiple new lines
should not be
removed</code></pre></div></div>
<pre class="expected"> Multiple new lines
should not be
removed</pre>
</div>
<div class="case" data-name="fenced pre/code block" data-options='{"codeBlockStyle": "fenced"}'>
<div class="input">
<pre><code>def a_fenced_code block; end</code></pre>
</div>
<pre class="expected">```
def a_fenced_code block; end
```</pre>
</div>
<div class="case" data-name="pre/code block fenced with ~" data-options='{"codeBlockStyle": "fenced", "fence": "~~~"}'>
<div class="input">
<pre><code>def a_fenced_code block; end</code></pre>
</div>
<pre class="expected">~~~
def a_fenced_code block; end
~~~</pre>
</div>
<div class="case" data-name="fenced pre/code block with language" data-options='{"codeBlockStyle": "fenced"}'>
<div class="input">
<pre><code class="language-ruby">def a_fenced_code block; end</code></pre>
</div>
<pre class="expected">```ruby
def a_fenced_code block; end
```</pre>
</div>
<div class="case" data-name="empty pre does not throw error">
<div class="input">
<pre></pre>
</div>
<pre class="expected"></pre>
</div>
<div class="case" data-name="ol">
<div class="input">
<ol>
<li>Ordered list item 1</li>
<li>Ordered list item 2</li>
<li>Ordered list item 3</li>
</ol>
</div>
<pre class="expected">1. Ordered list item 1
2. Ordered list item 2
3. Ordered list item 3</pre>
</div>
<div class="case" data-name="ol with start">
<div class="input">
<ol start="42">
<li>Ordered list item 42</li>
<li>Ordered list item 43</li>
<li>Ordered list item 44</li>
</ol>
</div>
<pre class="expected">42. Ordered list item 42
43. Ordered list item 43
44. Ordered list item 44</pre>
</div>
<div class="case" data-name="list spacing">
<div class="input">
<p>A paragraph.</p>
<ol>
<li>Ordered list item 1</li>
<li>Ordered list item 2</li>
<li>Ordered list item 3</li>
</ol>
<p>Another paragraph.</p>
<ul>
<li>Unordered list item 1</li>
<li>Unordered list item 2</li>
<li>Unordered list item 3</li>
</ul>
</div>
<pre class="expected">A paragraph.
1. Ordered list item 1
2. Ordered list item 2
3. Ordered list item 3
Another paragraph.
* Unordered list item 1
* Unordered list item 2
* Unordered list item 3</pre>
</div>
<div class="case" data-name="ul">
<div class="input">
<ul>
<li>Unordered list item 1</li>
<li>Unordered list item 2</li>
<li>Unordered list item 3</li>
</ul>
</div>
<pre class="expected">* Unordered list item 1
* Unordered list item 2
* Unordered list item 3</pre>
</div>
<div class="case" data-name="ul with custom bullet" data-options='{"bulletListMarker": "-"}'>
<div class="input">
<ul>
<li>Unordered list item 1</li>
<li>Unordered list item 2</li>
<li>Unordered list item 3</li>
</ul>
</div>
<pre class="expected">- Unordered list item 1
- Unordered list item 2
- Unordered list item 3</pre>
</div>
<div class="case" data-name="ul with paragraph">
<div class="input">
<ul>
<li><p>List item with paragraph</p></li>
<li>List item without paragraph</li>
</ul>
</div>
<pre class="expected">* List item with paragraph
* List item without paragraph</pre>
</div>
<div class="case" data-name="ol with paragraphs">
<div class="input">
<ol>
<li>
<p>This is a paragraph in a list item.</p>
<p>This is a paragraph in the same list item as above.</p>
</li>
<li>
<p>A paragraph in a second list item.</p>
</li>
</ol>
</div>
<pre class="expected">1. This is a paragraph in a list item.
This is a paragraph in the same list item as above.
2. A paragraph in a second list item.</pre>
</div>
<div class="case" data-name="nested uls">
<div class="input">
<ul>
<li>This is a list item at root level</li>
<li>This is another item at root level</li>
<li>
<ul>
<li>This is a nested list item</li>
<li>This is another nested list item</li>
<li>
<ul>
<li>This is a deeply nested list item</li>
<li>This is another deeply nested list item</li>
<li>This is a third deeply nested list item</li>
</ul>
</li>
</ul>
</li>
<li>This is a third item at root level</li>
</ul>
</div>
<pre class="expected">* This is a list item at root level
* This is another item at root level
* * This is a nested list item
* This is another nested list item
* * This is a deeply nested list item
* This is another deeply nested list item
* This is a third deeply nested list item
* This is a third item at root level</pre>
</div>
<div class="case" data-name="nested ols and uls">
<div class="input">
<ul>
<li>This is a list item at root level</li>
<li>This is another item at root level</li>
<li>
<ol>
<li>This is a nested list item</li>
<li>This is another nested list item</li>
<li>
<ul>
<li>This is a deeply nested list item</li>
<li>This is another deeply nested list item</li>
<li>This is a third deeply nested list item</li>
</ul>
</li>
</ol>
</li>
<li>This is a third item at root level</li>
</ul>
</div>
<pre class="expected">* This is a list item at root level
* This is another item at root level
* 1. This is a nested list item
2. This is another nested list item
3. * This is a deeply nested list item
* This is another deeply nested list item
* This is a third deeply nested list item
* This is a third item at root level</pre>
</div>
<div class="case" data-name="ul with blockquote">
<div class="input">
<ul>
<li>
<p>A list item with a blockquote:</p>
<blockquote>
<p>This is a blockquote inside a list item.</p>
</blockquote>
</li>
</ul>
</div>
<pre class="expected">* A list item with a blockquote:
> This is a blockquote inside a list item.</pre>
</div>
<div class="case" data-name="blockquote">
<div class="input">
<blockquote>
<p>This is a paragraph within a blockquote.</p>
<p>This is another paragraph within a blockquote.</p>
</blockquote>
</div>
<pre class="expected">> This is a paragraph within a blockquote.
>
> This is another paragraph within a blockquote.</pre>
</div>
<div class="case" data-name="nested blockquotes">
<div class="input">
<blockquote>
<p>This is the first level of quoting.</p>
<blockquote>
<p>This is a paragraph in a nested blockquote.</p>
</blockquote>
<p>Back to the first level.</p>
</blockquote>
</div>
<pre class="expected">> This is the first level of quoting.
>
> > This is a paragraph in a nested blockquote.
>
> Back to the first level.</pre>
</div>
<div class="case" data-name="html in blockquote">
<div class="input">
<blockquote>
<h2>This is a header.</h2>
<ol>
<li>This is the first list item.</li>
<li>This is the second list item.</li>
</ol>
<p>A code block:</p>
<pre><code>return 1 &lt; 2 ? shell_exec('echo $input | $markdown_script') : 0;</code></pre>
</blockquote>
</div>
<pre class="expected">> This is a header.
> -----------------
>
> 1. This is the first list item.
> 2. This is the second list item.
>
> A code block:
>
> return 1 < 2 ? shell_exec('echo $input | $markdown_script') : 0;</pre>
</div>
<div class="case" data-name="multiple divs">
<div class="input">
<div>A div</div>
<div>Another div</div>
</div>
<pre class="expected">A div
Another div</pre>
</div>
<div class="case" data-name="multiple divs">
<div class="input">
<div>A div</div>
<div>Another div</div>
</div>
<pre class="expected">A div
Another div</pre>
</div>
<div class="case" data-name="comment">
<div class="input"><!-- comment --></div>
<pre class="expected"></pre>
</div>
<div class="case" data-name="pre/code with comment">
<div class="input">
<pre ><code>Hello<!-- comment --> world</code></pre>
</div>
<pre class="expected"> Hello world</pre>
</div>
<div class="case" data-name="leading whitespace in heading">
<div class="input"><h3>
h3 with leading whitespace</h3></div>
<pre class="expected">### h3 with leading whitespace</pre>
</div>
<div class="case" data-name="trailing whitespace in li">
<div class="input">
<ol>
<li>Chapter One
<ol>
<li>Section One</li>
<li>Section Two with trailing whitespace </li>
<li>Section Three with trailing whitespace </li>
</ol>
</li>
<li>Chapter Two</li>
<li>Chapter Three with trailing whitespace </li>
</ol>
</div>
<pre class="expected">1. Chapter One
1. Section One
2. Section Two with trailing whitespace
3. Section Three with trailing whitespace
2. Chapter Two
3. Chapter Three with trailing whitespace</pre>
</div>
<div class="case" data-name="multilined and bizarre formatting">
<div class="input">
<ul>
<li>
Indented li with leading/trailing newlines
</li>
<li>
<strong>Strong with trailing space inside li with leading/trailing whitespace </strong> </li>
<li>li without whitespace</li>
<li> Leading space, text, lots of whitespace …
text
</li>
</ol>
</div>
<pre class="expected">* Indented li with leading/trailing newlines
* **Strong with trailing space inside li with leading/trailing whitespace**
* li without whitespace
* Leading space, text, lots of whitespace … text</pre>
</div>
<div class="case" data-name="whitespace between inline elements">
<div class="input">
<p>I <a href="http://example.com/need">need</a> <a href="http://www.example.com/more">more</a> spaces!</p>
</div>
<pre class="expected">I [need](http://example.com/need) [more](http://www.example.com/more) spaces!</pre>
</div>
<div class="case" data-name="whitespace in inline elements">
<div class="input">Text with no space after the period.<em> Text in em with leading/trailing spaces </em><strong>text in strong with trailing space </strong></div>
<pre class="expected">Text with no space after the period. _Text in em with leading/trailing spaces_ **text in strong with trailing space**</pre>
</div>
<div class="case" data-name="whitespace in nested inline elements">
<div class="input">Text at root <strong><a href="http://www.example.com">link text with trailing space in strong </a></strong>more text at root</div>
<pre class="expected">Text at root **[link text with trailing space in strong](http://www.example.com)** more text at root</pre>
</div>
<div class="case" data-name="blank inline elements">
<div class="input">
Text before blank em … <em></em> text after blank em
</div>
<pre class="expected">Text before blank em … text after blank em</pre>
</div>
<div class="case" data-name="blank block elements">
<div class="input">
Text before blank div … <div></div> text after blank div
</div>
<pre class="expected">Text before blank div …
text after blank div</pre>
</div>
<div class="case" data-name="blank inline element with br">
<div class="input"><strong><br></strong></div>
<pre class="expected"></pre>
</div>
<div class="case" data-name="whitespace between blocks">
<div class="input"><div><div>Content in a nested div</div></div>
<div>Content in another div</div></div>
<pre class="expected">Content in a nested div
Content in another div</pre>
</div>
<div class="case" data-name="escaping backslashes">
<div class="input">*\*</div>
<pre class="expected">*\\*</pre>
</div>
<div class="case" data-name="escaping headings with #">
<div class="input">### This is not a heading</div>
<pre class="expected">\### This is not a heading</pre>
</div>
<div class="case" data-name="escaping em markdown with *">
<div class="input">To add emphasis, surround text with *. For example: *this is emphasis*</div>
<pre class="expected">To add emphasis, surround text with *. For example: \*this is emphasis\*</pre>
</div>
<div class="case" data-name="escaping em markdown with _">
<div class="input">To add emphasis, surround text with _. For example: _this is emphasis_</div>
<pre class="expected">To add emphasis, surround text with _. For example: \_this is emphasis\_</pre>
</div>
<div class="case" data-name="not escaping underscores surrounded by words">
<div class="input"><pre><code>def this_is_a_method; end;</code></pre></div>
<pre class="expected"> def this_is_a_method; end;</pre>
</div>
<div class="case" data-name="escaping strong markdown with *">
<div class="input">To add strong emphasis, surround text with **. For example: **this is strong**</div>
<pre class="expected">To add strong emphasis, surround text with **. For example: \*\*this is strong\*\*</pre>
</div>
<div class="case" data-name="escaping strong markdown with _">
<div class="input">To add strong emphasis, surround text with __. For example: __this is strong__</div>
<pre class="expected">To add strong emphasis, surround text with __. For example: \_\_this is strong\_\_</pre>
</div>
<div class="case" data-name="escaping hr markdown with *">
<div class="input">* * *</div>
<pre class="expected">\* \* \*</pre>
</div>
<div class="case" data-name="escaping hr markdown with -">
<div class="input">- - -</div>
<pre class="expected">\- \- \-</pre>
</div>
<div class="case" data-name="escaping hr markdown with _">
<div class="input">_ _ _</div>
<pre class="expected">\_ \_ \_</pre>
</div>
<div class="case" data-name="escaping hr markdown without spaces">
<div class="input">***</div>
<pre class="expected">\*\*\*</pre>
</div>
<div class="case" data-name="escaping hr markdown with more than 3 characters">
<div class="input">* * * * *</div>
<pre class="expected">\* \* \* \* \*</pre>
</div>
<div class="case" data-name="escaping ol markdown">
<div class="input">1984. by George Orwell</div>
<pre class="expected">1984\. by George Orwell</pre>
</div>
<div class="case" data-name="not escaping numbers in a sentence">
<div class="input">George Orwell wrote 1984.</div>
<pre class="expected">George Orwell wrote 1984.</pre>
</div>
<div class="case" data-name="escaping ul markdown *">
<div class="input">* An unordered list item</div>
<pre class="expected">\* An unordered list item</pre>
</div>
<div class="case" data-name="escaping ul markdown -">
<div class="input">- An unordered list item</div>
<pre class="expected">\- An unordered list item</pre>
</div>
<div class="case" data-name="escaping ul markdown +">
<div class="input">+ An unordered list item</div>
<pre class="expected">\+ An unordered list item</pre>
</div>
<div class="case" data-name="not escaping *">
<div class="input">You can use * for multiplication: 1.5 * 3 = 4.5</div>
<pre class="expected">You can use * for multiplication: 1.5 * 3 = 4.5</pre>
</div>
<div class="case" data-name="not escaping -">
<div class="input">45.5 - 3.5 = 42</div>
<pre class="expected">45.5 - 3.5 = 42</pre>
</div>
<div class="case" data-name="not escaping +">
<div class="input">+1</div>
<pre class="expected">+1</pre>
</div>
<div class="case" data-name="escaping >">
<div class="input">> Blockquote in markdown</div>
<pre class="expected">\> Blockquote in markdown</pre>
</div>
<div class="case" data-name="not escaping >">
<div class="input">42 > 1</div>
<pre class="expected">42 > 1</pre>
</div>
<div class="case" data-name="escaping code">
<div class="input">`not code`</div>
<pre class="expected">\`not code\`</pre>
</div>
<div class="case" data-name="escaping []">
<div class="input">[This] is a sentence with brackets</div>
<pre class="expected">\[This\] is a sentence with brackets</pre>
</div>
<div class="case" data-name="escaping [">
<div class="input"><a href="http://www.example.com">c[iao</a></div>
<pre class="expected">[c\[iao](http://www.example.com)</pre>
</div>
<!-- https://github.com/domchristie/to-markdown/issues/188#issuecomment-332216019 -->
<div class="case" data-name="escaping * performance">
<div class="input">fasdf *883 asdf wer qweasd fsd asdf asdfaqwe rqwefrsdf</div>
<pre class="expected">fasdf *883 asdf wer qweasd fsd asdf asdfaqwe rqwefrsdf</pre>
</div>
<div class="case" data-name="escaping multiple asterisks">
<div class="input"><p>* * ** It aims to be*</p></div>
<pre class="expected">\* \* \*\* It aims to be*</pre>
</div>
<div class="case" data-name="escaping delimiters around short words and numbers">
<div class="input"><p>_Really_? Is that what it _is_? A **2000** year-old computer?</p></div>
<pre class="expected">\_Really\_? Is that what it \_is\_? A \*\*2000\*\* year-old computer?</pre>
</div>
<div class="case" data-name="non-markdown block elements">
<div class="input">
Foo
<div>Bar</div>
Baz
</div>
<pre class="expected">Foo
Bar
Baz</pre>
</div>
<div class="case" data-name="non-markdown inline elements">
<div class="input">
Foo <span>Bar</span>
</div>
<pre class="expected">Foo Bar</pre>
</div>
<div class="case" data-name="blank inline elements">
<div class="input">
Hello <em></em>world
</div>
<pre class="expected">Hello world</pre>
</div>
<div class="case" data-name="elements with a single void element">
<div class="input">
<p><img src="http://example.com/logo.png" /></p>
</div>
<pre class="expected">![](http://example.com/logo.png)</pre>
</div>
<div class="case" data-name="elements with a nested void element">
<div class="input">
<p><span><img src="http://example.com/logo.png" /></span></p>
</div>
<pre class="expected">![](http://example.com/logo.png)</pre>
</div>
<!-- /TEST CASES -->
<script src="turndown-test.browser.js"></script>
</body>
</html>

View File

@ -0,0 +1,170 @@
const Attendant = require('turndown-attendant');
const TurndownService = require('../lib/turndown.cjs');
const attendant = new Attendant({
file: `${__dirname}/index.html`,
TurndownService: TurndownService,
});
const test = attendant.test;
attendant.run();
test('malformed documents', function(t) {
t.plan(0);
const turndownService = new TurndownService();
turndownService.turndown('<HTML><head></head><BODY><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><body onload=alert(document.cookie);></body></html>');
t.end();
});
test('null input', function(t) {
t.plan(1);
const turndownService = new TurndownService();
t.throws(
function() { turndownService.turndown(null); }, /null is not a string/
);
});
test('undefined input', function(t) {
t.plan(1);
const turndownService = new TurndownService();
t.throws(
function() { turndownService.turndown(void (0)); },
/undefined is not a string/
);
});
test('#addRule returns the instance', function(t) {
t.plan(1);
const turndownService = new TurndownService();
const rule = {
filter: ['del', 's', 'strike'],
replacement: function(content) {
return `~~${content}~~`;
},
};
t.equal(turndownService.addRule('strikethrough', rule), turndownService);
});
test('#addRule adds the rule', function(t) {
t.plan(2);
const turndownService = new TurndownService();
const rule = {
filter: ['del', 's', 'strike'],
replacement: function(content) {
return `~~${content}~~`;
},
};
// Assert rules#add is called
turndownService.rules.add = function(key, r) {
t.equal(key, 'strikethrough');
t.equal(rule, r);
};
turndownService.addRule('strikethrough', rule);
});
test('#use returns the instance for chaining', function(t) {
t.plan(1);
const turndownService = new TurndownService();
t.equal(turndownService.use(function plugin() {}), turndownService);
});
test('#use with a single plugin calls the fn with instance', function(t) {
t.plan(1);
const turndownService = new TurndownService();
function plugin(service) {
t.equal(service, turndownService);
}
turndownService.use(plugin);
});
test('#use with multiple plugins calls each fn with instance', function(t) {
t.plan(2);
const turndownService = new TurndownService();
function plugin1(service) {
t.equal(service, turndownService);
}
function plugin2(service) {
t.equal(service, turndownService);
}
turndownService.use([plugin1, plugin2]);
});
test('#keep keeps elements as HTML', function(t) {
t.plan(2);
const turndownService = new TurndownService();
const input = '<p>Hello <del>world</del><ins>World</ins></p>';
// Without `.keep(['del', 'ins'])`
t.equal(turndownService.turndown(input), 'Hello worldWorld');
// With `.keep(['del', 'ins'])`
turndownService.keep(['del', 'ins']);
t.equal(
turndownService.turndown('<p>Hello <del>world</del><ins>World</ins></p>'),
'Hello <del>world</del><ins>World</ins>'
);
});
test('#keep returns the TurndownService instance for chaining', function(t) {
t.plan(1);
const turndownService = new TurndownService();
t.equal(turndownService.keep(['del', 'ins']), turndownService);
});
test('keep rules are overridden by the standard rules', function(t) {
t.plan(1);
const turndownService = new TurndownService();
turndownService.keep('p');
t.equal(turndownService.turndown('<p>Hello world</p>'), 'Hello world');
});
test('keepReplacement can be customised', function(t) {
t.plan(1);
const turndownService = new TurndownService({
keepReplacement: function(content, node) {
return `\n\n${node.outerHTML}\n\n`;
},
});
turndownService.keep(['del', 'ins']);
t.equal(turndownService.turndown(
'<p>Hello <del>world</del><ins>World</ins></p>'),
'Hello \n\n<del>world</del>\n\n<ins>World</ins>'
);
});
test('#remove removes elements', function(t) {
t.plan(2);
const turndownService = new TurndownService();
const input = '<del>Please redact me</del>';
// Without `.remove('del')`
t.equal(turndownService.turndown(input), 'Please redact me');
// With `.remove('del')`
turndownService.remove('del');
t.equal(turndownService.turndown(input), '');
});
test('#remove returns the TurndownService instance for chaining', function(t) {
t.plan(1);
const turndownService = new TurndownService();
t.equal(turndownService.remove(['del', 'ins']), turndownService);
});
test('remove elements are overridden by rules', function(t) {
t.plan(1);
const turndownService = new TurndownService();
turndownService.remove('p');
t.equal(turndownService.turndown('<p>Hello world</p>'), 'Hello world');
});
test('remove elements are overridden by keep', function(t) {
t.plan(1);
const turndownService = new TurndownService();
turndownService.keep(['del', 'ins']);
turndownService.remove(['del', 'ins']);
t.equal(turndownService.turndown(
'<p>Hello <del>world</del><ins>World</ins></p>'),
'Hello <del>world</del><ins>World</ins>'
);
});