2905 lines
104 KiB
JavaScript
2905 lines
104 KiB
JavaScript
/*!
|
|
* Copyright (c) 2006 js-markdown-extra developers
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
var MARKDOWN_VERSION = "1.0.1o";
|
|
var MARKDOWNEXTRA_VERSION = "1.2.5";
|
|
|
|
// Global default settings:
|
|
|
|
/** Change to ">" for HTML output */
|
|
var MARKDOWN_EMPTY_ELEMENT_SUFFIX = " />";
|
|
|
|
/** Define the width of a tab for code blocks. */
|
|
var MARKDOWN_TAB_WIDTH = 4;
|
|
|
|
/** Optional title attribute for footnote links and backlinks. */
|
|
var MARKDOWN_FN_LINK_TITLE = "";
|
|
var MARKDOWN_FN_BACKLINK_TITLE = "";
|
|
|
|
/** Optional class attribute for footnote links and backlinks. */
|
|
var MARKDOWN_FN_LINK_CLASS = "";
|
|
var MARKDOWN_FN_BACKLINK_CLASS = "";
|
|
|
|
/** Change to false to remove Markdown from posts and/or comments. */
|
|
var MARKDOWN_WP_POSTS = true;
|
|
var MARKDOWN_WP_COMMENTS = true;
|
|
|
|
/** Standard Function Interface */
|
|
MARKDOWN_PARSER_CLASS = 'MarkdownExtra_Parser';
|
|
|
|
/**
|
|
* Converts Markdown formatted text to HTML.
|
|
* @param text Markdown text
|
|
* @return HTML
|
|
*/
|
|
function Markdown(text) {
|
|
//Initialize the parser and return the result of its transform method.
|
|
var parser;
|
|
if('undefined' == typeof arguments.callee.parser) {
|
|
parser = eval("new " + MARKDOWN_PARSER_CLASS + "()");
|
|
parser.init();
|
|
arguments.callee.parser = parser;
|
|
}
|
|
else {
|
|
parser = arguments.callee.parser;
|
|
}
|
|
// Transform text using parser.
|
|
return parser.transform(text);
|
|
}
|
|
|
|
/**
|
|
* Constructor function. Initialize appropriate member variables.
|
|
*/
|
|
function Markdown_Parser() {
|
|
|
|
this.nested_brackets_depth = 6;
|
|
this.nested_url_parenthesis_depth = 4;
|
|
this.escape_chars = "\\\\`*_{}[]()>#+-.!";
|
|
|
|
// Document transformations
|
|
this.document_gamut = [
|
|
// Strip link definitions, store in hashes.
|
|
['stripLinkDefinitions', 20],
|
|
['runBasicBlockGamut', 30]
|
|
];
|
|
|
|
// These are all the transformations that form block-level
|
|
/// tags like paragraphs, headers, and list items.
|
|
this.block_gamut = [
|
|
['doHeaders', 10],
|
|
['doHorizontalRules', 20],
|
|
['doLists', 40],
|
|
['doCodeBlocks', 50],
|
|
['doBlockQuotes', 60]
|
|
];
|
|
|
|
// These are all the transformations that occur *within* block-level
|
|
// tags like paragraphs, headers, and list items.
|
|
this.span_gamut = [
|
|
// Process character escapes, code spans, and inline HTML
|
|
// in one shot.
|
|
['parseSpan', -30],
|
|
// Process anchor and image tags. Images must come first,
|
|
// because ![foo][f] looks like an anchor.
|
|
['doImages', 10],
|
|
['doAnchors', 20],
|
|
// Make links out of things like `<http://example.com/>`
|
|
// Must come after doAnchors, because you can use < and >
|
|
// delimiters in inline links like [this](<url>).
|
|
['doAutoLinks', 30],
|
|
['encodeAmpsAndAngles', 40],
|
|
['doItalicsAndBold', 50],
|
|
['doHardBreaks', 60]
|
|
];
|
|
|
|
this.em_relist = [
|
|
['' , '(?:(^|[^\\*])(\\*)(?=[^\\*])|(^|[^_])(_)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
|
|
['*', '((?:\\S|^)[^\\*])(\\*)(?!\\*)'],
|
|
['_', '((?:\\S|^)[^_])(_)(?!_)']
|
|
];
|
|
this.strong_relist = [
|
|
['' , '(?:(^|[^\\*])(\\*\\*)(?=[^\\*])|(^|[^_])(__)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
|
|
['**', '((?:\\S|^)[^\\*])(\\*\\*)(?!\\*)'],
|
|
['__', '((?:\\S|^)[^_])(__)(?!_)']
|
|
];
|
|
this.em_strong_relist = [
|
|
['' , '(?:(^|[^\\*])(\\*\\*\\*)(?=[^\\*])|(^|[^_])(___)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
|
|
['***', '((?:\\S|^)[^\\*])(\\*\\*\\*)(?!\\*)'],
|
|
['___', '((?:\\S|^)[^_])(___)(?!_)']
|
|
];
|
|
}
|
|
|
|
Markdown_Parser.prototype.init = function() {
|
|
// this._initDetab(); // NOTE: JavaScript string length is already based on Unicode
|
|
this.prepareItalicsAndBold();
|
|
|
|
// Regex to match balanced [brackets].
|
|
// Needed to insert a maximum bracked depth while converting to PHP.
|
|
// NOTE: JavaScript doesn't have so faster option for RegExp
|
|
//this.nested_brackets_re = new RegExp(
|
|
// str_repeat('(?>[^\\[\\]]+|\\[', this.nested_brackets_depth) +
|
|
// str_repeat('\\])*', this.nested_brackets_depth)
|
|
//);
|
|
// NOTE: JavaScript doesn't have so faster option for RegExp
|
|
//this.nested_url_parenthesis_re = new RegExp(
|
|
// str_repeat('(?>[^()\\s]+|\\(', this.nested_url_parenthesis_depth) +
|
|
// str_repeat('(?>\\)))*', this.nested_url_parenthesis_depth)
|
|
//);
|
|
|
|
this.nested_brackets_re = '(?:\\[[^\\]]*\]|[^\\[\\]]*)';
|
|
this.nested_url_parenthesis_re = '(?:\\([^\\)\\s]*\\)|[^\\(\\)]*)';
|
|
|
|
// Table of hash values for escaped characters:
|
|
var tmp = [];
|
|
for(var i = 0; i < this.escape_chars.length; i++) {
|
|
tmp.push(this._php_preg_quote(this.escape_chars.charAt(i)));
|
|
}
|
|
this.escape_chars_re = '[' + tmp.join('') + ']';
|
|
|
|
// Change to ">" for HTML output.
|
|
this.empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
|
|
this.tab_width = MARKDOWN_TAB_WIDTH;
|
|
|
|
// Change to `true` to disallow markup or entities.
|
|
this.no_markup = false;
|
|
this.no_entities = false;
|
|
|
|
// Predefined urls and titles for reference links and images.
|
|
this.predef_urls = {};
|
|
this.predef_titles = {};
|
|
|
|
// Sort document, block, and span gamut in ascendent priority order.
|
|
function cmp_gamut(a, b) {
|
|
a = a[1]; b = b[1];
|
|
return a > b ? 1 : a < b ? -1 : 0;
|
|
}
|
|
this.document_gamut.sort(cmp_gamut);
|
|
this.block_gamut.sort(cmp_gamut);
|
|
this.span_gamut.sort(cmp_gamut);
|
|
|
|
// Internal hashes used during transformation.
|
|
this.urls = {};
|
|
this.titles = {};
|
|
this.html_hashes = {};
|
|
|
|
// Status flag to avoid invalid nesting.
|
|
this.in_anchor = false;
|
|
};
|
|
|
|
/**
|
|
* [porting note]
|
|
* JavaScript's RegExp doesn't have escape code \A and \Z.
|
|
* So multiline pattern can't match start/end of text. Instead
|
|
* wrap whole of text with STX(02) and ETX(03).
|
|
*/
|
|
Markdown_Parser.prototype.__wrapSTXETX__ = function(text) {
|
|
if(text.charAt(0) != '\x02') { text = '\x02' + text; }
|
|
if(text.charAt(text.length - 1) != '\x03') { text = text + '\x03'; }
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* [porting note]
|
|
* Strip STX(02) and ETX(03).
|
|
*/
|
|
Markdown_Parser.prototype.__unwrapSTXETX__ = function(text) {
|
|
if(text.charAt(0) == '\x02') { text = text.substr(1); }
|
|
if(text.charAt(text.length - 1) == '\x03') { text = text.substr(0, text.length - 1); }
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
*
|
|
*/
|
|
Markdown_Parser.prototype._php_preg_quote = function(text) {
|
|
if(!arguments.callee.sRE) {
|
|
arguments.callee.sRE = /(\/|\.|\*|\+|\?|\||\(|\)|\[|\]|\{|\}\\)/g;
|
|
}
|
|
return text.replace(arguments.callee.sRE, '\\$1');
|
|
};
|
|
|
|
Markdown_Parser.prototype._php_str_repeat = function(str, n) {
|
|
var tmp = str;
|
|
for(var i = 1; i < n; i++) {
|
|
tmp += str;
|
|
}
|
|
return tmp;
|
|
};
|
|
|
|
Markdown_Parser.prototype._php_trim = function(target, charlist) {
|
|
var chars = charlist || " \t\n\r";
|
|
return target.replace(
|
|
new RegExp("^[" + chars + "]*|[" + chars + "]*$", "g"), ""
|
|
);
|
|
};
|
|
|
|
Markdown_Parser.prototype._php_rtrim = function(target, charlist) {
|
|
var chars = charlist || " \t\n\r";
|
|
return target.replace(
|
|
new RegExp( "[" + chars + "]*$", "g" ), ""
|
|
);
|
|
};
|
|
|
|
Markdown_Parser.prototype._php_htmlspecialchars_ENT_NOQUOTES = function(str) {
|
|
return str.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
|
};
|
|
|
|
|
|
/**
|
|
* Called before the transformation process starts to setup parser
|
|
* states.
|
|
*/
|
|
Markdown_Parser.prototype.setup = function() {
|
|
// Clear global hashes.
|
|
this.urls = this.predef_urls;
|
|
this.titles = this.predef_titles;
|
|
this.html_hashes = {};
|
|
|
|
this.in_anchor = false;
|
|
};
|
|
|
|
/**
|
|
* Called after the transformation process to clear any variable
|
|
* which may be taking up memory unnecessarly.
|
|
*/
|
|
Markdown_Parser.prototype.teardown = function() {
|
|
this.urls = {};
|
|
this.titles = {};
|
|
this.html_hashes = {};
|
|
};
|
|
|
|
/**
|
|
* Main function. Performs some preprocessing on the input text
|
|
* and pass it through the document gamut.
|
|
*/
|
|
Markdown_Parser.prototype.transform = function(text) {
|
|
this.setup();
|
|
|
|
// Remove UTF-8 BOM and marker character in input, if present.
|
|
text = text.replace(/^\xEF\xBB\xBF|\x1A/, "");
|
|
|
|
// Standardize line endings:
|
|
// DOS to Unix and Mac to Unix
|
|
text = text.replace(/\r\n?/g, "\n", text);
|
|
|
|
// Make sure $text ends with a couple of newlines:
|
|
text += "\n\n";
|
|
|
|
// Convert all tabs to spaces.
|
|
text = this.detab(text);
|
|
|
|
// Turn block-level HTML blocks into hash entries
|
|
text = this.hashHTMLBlocks(text);
|
|
|
|
// Strip any lines consisting only of spaces and tabs.
|
|
// This makes subsequent regexen easier to write, because we can
|
|
// match consecutive blank lines with /\n+/ instead of something
|
|
// contorted like /[ ]*\n+/ .
|
|
text = text.replace(/^[ ]+$/m, "");
|
|
|
|
// Run document gamut methods.
|
|
for(var i = 0; i < this.document_gamut.length; i++) {
|
|
var method = this[this.document_gamut[i][0]];
|
|
if(method) {
|
|
text = method.call(this, text);
|
|
}
|
|
else {
|
|
console.log(this.document_gamut[i][0] + ' not implemented');
|
|
}
|
|
}
|
|
|
|
this.teardown();
|
|
|
|
return text + "\n";
|
|
};
|
|
|
|
Markdown_Parser.prototype.hashHTMLBlocks = function(text) {
|
|
if(this.no_markup) { return text; }
|
|
|
|
var less_than_tab = this.tab_width - 1;
|
|
|
|
// Hashify HTML blocks:
|
|
// We only want to do this for block-level HTML tags, such as headers,
|
|
// lists, and tables. That's because we still want to wrap <p>s around
|
|
// "paragraphs" that are wrapped in non-block-level tags, such as anchors,
|
|
// phrase emphasis, and spans. The list of tags we're looking for is
|
|
// hard-coded:
|
|
//
|
|
// * List "a" is made of tags which can be both inline or block-level.
|
|
// These will be treated block-level when the start tag is alone on
|
|
// its line, otherwise they're not matched here and will be taken as
|
|
// inline later.
|
|
// * List "b" is made of tags which are always block-level;
|
|
|
|
var block_tags_a_re = 'ins|del';
|
|
var block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' +
|
|
'script|noscript|form|fieldset|iframe|math';
|
|
|
|
// Regular expression for the content of a block tag.
|
|
var nested_tags_level = 4;
|
|
var attr =
|
|
'(?:' + // optional tag attributes
|
|
'\\s' + // starts with whitespace
|
|
'(?:' +
|
|
'[^>"/]+' + // text outside quotes
|
|
'|' +
|
|
'/+(?!>)' + // slash not followed by ">"
|
|
'|' +
|
|
'"[^"]*"' + // text inside double quotes (tolerate ">")
|
|
'|' +
|
|
'\'[^\']*\'' + // text inside single quotes (tolerate ">")
|
|
')*' +
|
|
')?';
|
|
var content =
|
|
this._php_str_repeat(
|
|
'(?:' +
|
|
'[^<]+' + // content without tag
|
|
'|' +
|
|
'<\\2' + // nested opening tag
|
|
attr + // attributes
|
|
'(?:' +
|
|
'/>' +
|
|
'|' +
|
|
'>',
|
|
nested_tags_level
|
|
) + // end of opening tag
|
|
'.*?' + // last level nested tag content
|
|
this._php_str_repeat(
|
|
'</\\2\\s*>' + // closing nested tag
|
|
')' +
|
|
'|' +
|
|
'<(?!/\\2\\s*>)' + // other tags with a different name
|
|
')*',
|
|
nested_tags_level
|
|
);
|
|
|
|
var content2 = content.replace('\\2', '\\3');
|
|
|
|
// First, look for nested blocks, e.g.:
|
|
// <div>
|
|
// <div>
|
|
// tags for inner block must be indented.
|
|
// </div>
|
|
// </div>
|
|
//
|
|
// The outermost tags must start at the left margin for this to match, and
|
|
// the inner nested divs must be indented.
|
|
// We need to do this before the next, more liberal match, because the next
|
|
// match will start at the first `<div>` and stop at the first `</div>`.
|
|
var all = new RegExp('(?:' +
|
|
'(?:' +
|
|
'(?:\\n\\n)' + // Starting after a blank line
|
|
'|' + // or
|
|
'(?:\\x02)\\n?' + // the beginning of the doc
|
|
')' +
|
|
'(' + // save in $1
|
|
|
|
// Match from `\n<tag>` to `</tag>\n`, handling nested tags
|
|
// in between.
|
|
'[ ]{0,' + less_than_tab + '}' +
|
|
'<(' + block_tags_b_re + ')' + // start tag = $2
|
|
attr + '>' + // attributes followed by > and \n
|
|
content + // content, support nesting
|
|
'</\\2>' + // the matching end tag
|
|
'[ ]*' + // trailing spaces/tabs
|
|
'(?=\\n+|\\n*\\x03)' + // followed by a newline or end of document
|
|
|
|
'|' + // Special version for tags of group a.
|
|
|
|
'[ ]{0,' + less_than_tab + '}' +
|
|
'<(' + block_tags_a_re + ')' + // start tag = $3
|
|
attr + '>[ ]*\\n' + // attributes followed by >
|
|
content2 + // content, support nesting
|
|
'</\\3>' + // the matching end tag
|
|
'[ ]*' + // trailing spaces/tabs
|
|
'(?=\\n+|\\n*\\x03)' + // followed by a newline or end of document
|
|
|
|
'|' + // Special case just for <hr />. It was easier to make a special
|
|
// case than to make the other regex more complicated.
|
|
|
|
'[ ]{0,' + less_than_tab + '}' +
|
|
'<(hr)' + // start tag = $2
|
|
attr + // attributes
|
|
'/?>' + // the matching end tag
|
|
'[ ]*' +
|
|
'(?=\\n{2,}|\\n*\\x03)' + // followed by a blank line or end of document
|
|
|
|
'|' + // Special case for standalone HTML comments:
|
|
|
|
'[ ]{0,' + less_than_tab + '}' +
|
|
'(?:' + //'(?s:' +
|
|
'<!--.*?-->' +
|
|
')' +
|
|
'[ ]*' +
|
|
'(?=\\n{2,}|\\n*\\x03)' + // followed by a blank line or end of document
|
|
|
|
'|' + // PHP and ASP-style processor instructions (<? and <%)
|
|
|
|
'[ ]{0,' + less_than_tab + '}' +
|
|
'(?:' + //'(?s:' +
|
|
'<([?%])' + // $2
|
|
'.*?' +
|
|
'\\2>' +
|
|
')' +
|
|
'[ ]*' +
|
|
'(?=\\n{2,}|\\n*\\x03)' + // followed by a blank line or end of document
|
|
|
|
')' +
|
|
')', 'mig');
|
|
// FIXME: JS doesnt have enough escape sequence \A nor \Z.
|
|
|
|
var self = this;
|
|
text = this.__wrapSTXETX__(text);
|
|
text = text.replace(all, function(match, text) {
|
|
//console.log(match);
|
|
var key = self.hashBlock(text);
|
|
return "\n\n" + key + "\n\n";
|
|
});
|
|
text = this.__unwrapSTXETX__(text);
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Called whenever a tag must be hashed when a function insert an atomic
|
|
* element in the text stream. Passing $text to through this function gives
|
|
* a unique text-token which will be reverted back when calling unhash.
|
|
*
|
|
* The boundary argument specify what character should be used to surround
|
|
* the token. By convension, "B" is used for block elements that needs not
|
|
* to be wrapped into paragraph tags at the end, ":" is used for elements
|
|
* that are word separators and "X" is used in the general case.
|
|
*/
|
|
Markdown_Parser.prototype.hashPart = function(text, boundary) {
|
|
if('undefined' === typeof boundary) {
|
|
boundary = 'X';
|
|
}
|
|
// Swap back any tag hash found in text so we do not have to `unhash`
|
|
// multiple times at the end.
|
|
text = this.unhash(text);
|
|
|
|
// Then hash the block.
|
|
if('undefined' === typeof arguments.callee.i) {
|
|
arguments.callee.i = 0;
|
|
}
|
|
var key = boundary + "\x1A" + (++arguments.callee.i) + boundary;
|
|
this.html_hashes[key] = text;
|
|
return key; // String that will replace the tag.
|
|
};
|
|
|
|
/**
|
|
* Shortcut function for hashPart with block-level boundaries.
|
|
*/
|
|
Markdown_Parser.prototype.hashBlock = function(text) {
|
|
return this.hashPart(text, 'B');
|
|
};
|
|
|
|
/**
|
|
* Strips link definitions from text, stores the URLs and titles in
|
|
* hash references.
|
|
*/
|
|
Markdown_Parser.prototype.stripLinkDefinitions = function(text) {
|
|
var less_than_tab = this.tab_width - 1;
|
|
var self = this;
|
|
// Link defs are in the form: ^[id]: url "optional title"
|
|
text = this.__wrapSTXETX__(text);
|
|
text = text.replace(new RegExp(
|
|
'^[ ]{0,' + less_than_tab + '}\\[(.+)\\][ ]?:' + // id = $1
|
|
'[ ]*' +
|
|
'\\n?' + // maybe *one* newline
|
|
'[ ]*' +
|
|
'(?:' +
|
|
'<(.+?)>' + // url = $2
|
|
'|' +
|
|
'(\\S+?)' + // url = $3
|
|
')' +
|
|
'[ ]*' +
|
|
'\\n?' + // maybe one newline
|
|
'[ ]*' +
|
|
'(?:' +
|
|
//'(?=\\s)' + // lookbehind for whitespace
|
|
'["\\(]' +
|
|
'(.*?)' + // title = $4
|
|
'["\\)]' +
|
|
'[ ]*' +
|
|
')?' + // title is optional
|
|
'(?:\\n+|\\n*(?=\\x03))',
|
|
'mg'), function(match, id, url2, url3, title) {
|
|
//console.log(match);
|
|
var link_id = id.toLowerCase();
|
|
var url = url2 ? url2 : url3;
|
|
self.urls[link_id] = url;
|
|
self.titles[link_id] = title;
|
|
return ''; // String that will replace the block
|
|
}
|
|
);
|
|
text = this.__unwrapSTXETX__(text);
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Run block gamut tranformations.
|
|
*/
|
|
Markdown_Parser.prototype.runBlockGamut = function(text) {
|
|
// We need to escape raw HTML in Markdown source before doing anything
|
|
// else. This need to be done for each block, and not only at the
|
|
// begining in the Markdown function since hashed blocks can be part of
|
|
// list items and could have been indented. Indented blocks would have
|
|
// been seen as a code block in a previous pass of hashHTMLBlocks.
|
|
text = this.hashHTMLBlocks(text);
|
|
return this.runBasicBlockGamut(text);
|
|
};
|
|
|
|
/**
|
|
* Run block gamut tranformations, without hashing HTML blocks. This is
|
|
* useful when HTML blocks are known to be already hashed, like in the first
|
|
* whole-document pass.
|
|
*/
|
|
Markdown_Parser.prototype.runBasicBlockGamut = function(text) {
|
|
for(var i = 0; i < this.block_gamut.length; i++) {
|
|
var method = this[this.block_gamut[i][0]];
|
|
if(method) {
|
|
text = method.call(this, text);
|
|
}
|
|
else {
|
|
console.log(this.block_gamut[i][0] + ' not implemented');
|
|
}
|
|
}
|
|
// Finally form paragraph and restore hashed blocks.
|
|
text = this.formParagraphs(text);
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Do Horizontal Rules:
|
|
*/
|
|
Markdown_Parser.prototype.doHorizontalRules = function(text) {
|
|
var self = this;
|
|
return text.replace(new RegExp(
|
|
'^[ ]{0,3}' + // Leading space
|
|
'([-\\*_])' + // $1: First marker
|
|
'(?:' + // Repeated marker group
|
|
'[ ]{0,2}' + // Zero, one, or two spaces.
|
|
'\\1' + // Marker character
|
|
'){2,}' + // Group repeated at least twice
|
|
'[ ]*' + //Tailing spaces
|
|
'$' , // End of line.
|
|
'mg'), function(match) {
|
|
//console.log(match);
|
|
return "\n" + self.hashBlock("<hr" + self.empty_element_suffix) + "\n";
|
|
});
|
|
};
|
|
|
|
/**
|
|
* Run span gamut tranformations.
|
|
*/
|
|
Markdown_Parser.prototype.runSpanGamut = function(text) {
|
|
for(var i = 0; i < this.span_gamut.length; i++) {
|
|
var method = this[this.span_gamut[i][0]];
|
|
if(method) {
|
|
text = method.call(this, text);
|
|
}
|
|
else {
|
|
console.log(this.span_gamut[i][0] + ' not implemented');
|
|
}
|
|
}
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Do hard breaks:
|
|
*/
|
|
Markdown_Parser.prototype.doHardBreaks = function(text) {
|
|
var self = this;
|
|
return text.replace(/ {2,}\n/mg, function(match) {
|
|
//console.log(match);
|
|
return self.hashPart("<br" + self.empty_element_suffix + "\n");
|
|
});
|
|
};
|
|
|
|
|
|
/**
|
|
* Turn Markdown link shortcuts into XHTML <a> tags.
|
|
*/
|
|
Markdown_Parser.prototype.doAnchors = function(text) {
|
|
if (this.in_anchor) return text;
|
|
this.in_anchor = true;
|
|
|
|
var self = this;
|
|
|
|
var _doAnchors_reference_callback = function(match, whole_match, link_text, link_id) {
|
|
//console.log(match);
|
|
if(typeof(link_id) !== 'string' || link_id === '') {
|
|
// for shortcut links like [this][] or [this].
|
|
link_id = link_text;
|
|
}
|
|
|
|
// lower-case and turn embedded newlines into spaces
|
|
link_id = link_id.toLowerCase();
|
|
link_id = link_id.replace(/[ ]?\n/, ' ');
|
|
|
|
var result;
|
|
if ('undefined' !== typeof self.urls[link_id]) {
|
|
var url = self.urls[link_id];
|
|
url = self.encodeAttribute(url);
|
|
|
|
result = "<a href=\"" + url + "\"";
|
|
if ('undefined' !== typeof self.titles[link_id]) {
|
|
var title = self.titles[link_id];
|
|
title = self.encodeAttribute(title);
|
|
result += " title=\"" + title + "\"";
|
|
}
|
|
|
|
link_text = self.runSpanGamut(link_text);
|
|
result += ">" + link_text + "</a>";
|
|
result = self.hashPart(result);
|
|
}
|
|
else {
|
|
result = whole_match;
|
|
}
|
|
return result;
|
|
};
|
|
|
|
//
|
|
// First, handle reference-style links: [link text] [id]
|
|
//
|
|
// [porting note] the cheatText and conditional
|
|
// are simply checks that look and see whether the regex will
|
|
// be able to find a match. If we don't do this here we can get caught in
|
|
// a situation where backtracking grows exponentially.
|
|
// This helps us keep the same regex as the upstream PHP impl, but still be safe/fast
|
|
var cheatText = text.replace(/[^\[^\]^\n^\s]/gm, '');
|
|
if ((cheatText.indexOf("[][]") !== -1) || (cheatText.indexOf("[] []") !== -1) || (cheatText.indexOf("[]\n[]") !== -1)) {
|
|
text = text.replace(new RegExp(
|
|
'(' + // wrap whole match in $1
|
|
'\\[' +
|
|
'(' + this.nested_brackets_re + ')' + // link text = $2
|
|
'\\]' +
|
|
|
|
'[ ]?' + // one optional space
|
|
'(?:\\n[ ]*)?' + // one optional newline followed by spaces
|
|
|
|
'\\[' +
|
|
'(.*?)' + // id = $3
|
|
'\\]' +
|
|
')',
|
|
'mg'
|
|
), _doAnchors_reference_callback);
|
|
}
|
|
|
|
//
|
|
// Next, inline-style links: [link text](url "optional title")
|
|
//
|
|
// [porting note] the cheatText and conditional
|
|
// are simply checks that look and see whether the regex will
|
|
// be able to find a match. If we don't do this here we can get caught in
|
|
// a situation where backtracking grows exponentially.
|
|
// This helps us keep the same regex as the upstream PHP impl, but still be safe/fast
|
|
cheatText = text.replace(/[^\(^\)^\[^\]^\s]/gm, '').replace(/\(.*?\)/,'()');
|
|
if ((cheatText.indexOf("]()") !== -1) || (cheatText.indexOf("](\"\")") !== -1)) {
|
|
text = text.replace(new RegExp(
|
|
'(' + // wrap whole match in $1
|
|
'\\[' +
|
|
'(' + this.nested_brackets_re + ')' + // link text = $2
|
|
'\\]' +
|
|
'\\(' + // literal paren
|
|
'[ \\n]*' +
|
|
'(?:' +
|
|
'<(.+?)>' + // href = $3
|
|
'|' +
|
|
'(' + this.nested_url_parenthesis_re + ')' + // href = $4
|
|
')' +
|
|
'[ \\n]*' +
|
|
'(' + // $5
|
|
'([\'"])' + // quote char = $6
|
|
'(.*?)' + // Title = $7
|
|
'\\6' + // matching quote
|
|
'[ \\n]*' + // ignore any spaces/tabs between closing quote and )
|
|
')?' + // title is optional
|
|
'\\)' +
|
|
')',
|
|
'mg'
|
|
), function(match, whole_match, link_text, url3, url4, x0, x1, title) {
|
|
//console.log(match);
|
|
link_text = self.runSpanGamut(link_text);
|
|
var url = url3 ? url3 : url4;
|
|
|
|
url = self.encodeAttribute(url);
|
|
|
|
var result = "<a href=\"" + url + "\"";
|
|
if ('undefined' !== typeof title && title !== '') {
|
|
title = self.encodeAttribute(title);
|
|
result += " title=\"" + title + "\"";
|
|
}
|
|
|
|
link_text = self.runSpanGamut(link_text);
|
|
result += ">" + link_text + "</a>";
|
|
|
|
return self.hashPart(result);
|
|
});
|
|
}
|
|
|
|
|
|
//
|
|
// Last, handle reference-style shortcuts: [link text]
|
|
// These must come last in case you've also got [link text][1]
|
|
// or [link text](/foo)
|
|
//
|
|
text = text.replace(new RegExp(
|
|
'(' + // wrap whole match in $1
|
|
'\\[' +
|
|
'([^\\[\\]]+)' + // link text = $2; can\'t contain [ or ]
|
|
'\\]' +
|
|
')',
|
|
'mg'
|
|
), _doAnchors_reference_callback);
|
|
|
|
this.in_anchor = false;
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Turn Markdown image shortcuts into <img> tags.
|
|
*/
|
|
Markdown_Parser.prototype.doImages = function(text) {
|
|
var self = this;
|
|
|
|
//
|
|
// First, handle reference-style labeled images: ![alt text][id]
|
|
//
|
|
cheatText = text.replace(/[^!^\[^\]^\n^\s]/gm, '').replace(/\[\s*\]/g, '[]');
|
|
if ((cheatText.indexOf('![][]') !== -1) || (cheatText.indexOf('![] []') !== -1) || (cheatText.indexOf('![]\n[]') !== -1)) {
|
|
text = text.replace(new RegExp(
|
|
'(' + // wrap whole match in $1
|
|
'!\\[' +
|
|
'(' + this.nested_brackets_re + ')' + // alt text = $2
|
|
'\\]' +
|
|
|
|
'[ ]?' + // one optional space
|
|
'(?:\\n[ ]*)?' + // one optional newline followed by spaces
|
|
|
|
'\\[' +
|
|
'(.*?)' + // id = $3
|
|
'\\]' +
|
|
|
|
')',
|
|
'mg'
|
|
), function(match, whole_match, alt_text, link_id) {
|
|
//console.log(match);
|
|
link_id = link_id.toLowerCase();
|
|
|
|
if (typeof(link_id) !== 'string' || link_id === '') {
|
|
link_id = alt_text.toLowerCase(); // for shortcut links like ![this][].
|
|
}
|
|
|
|
alt_text = self.encodeAttribute(alt_text);
|
|
var result;
|
|
if ('undefined' !== typeof self.urls[link_id]) {
|
|
var url = self.encodeAttribute(self.urls[link_id]);
|
|
result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
|
|
if ('undefined' !== typeof self.titles[link_id]) {
|
|
var title = self.titles[link_id];
|
|
title = self.encodeAttribute(title);
|
|
result += " title=\"" + title + "\"";
|
|
}
|
|
result += self.empty_element_suffix;
|
|
result = self.hashPart(result);
|
|
}
|
|
else {
|
|
// If there's no such link ID, leave intact:
|
|
result = whole_match;
|
|
}
|
|
|
|
return result;
|
|
});
|
|
}
|
|
|
|
//
|
|
// Next, handle inline images: ![alt text](url "optional title")
|
|
// Don't forget: encode * and _
|
|
//
|
|
cheatText = text.replace(/[^!^\(^\)^\[^\]^\n^\s]/gm, '').replace(/\[\s*\]/g, '[]');
|
|
if ((cheatText.indexOf(']()') !== -1) || (cheatText.indexOf('] ()') !== -1) || (cheatText.indexOf(']\n()') !== -1)) {
|
|
text = text.replace(new RegExp(
|
|
'(' + // wrap whole match in $1
|
|
'!\\[' +
|
|
'(' + this.nested_brackets_re + ')' + // alt text = $2
|
|
'\\]' +
|
|
'\\s?' + // One optional whitespace character
|
|
'\\(' + // literal paren
|
|
'[ \\n]*' +
|
|
'(?:' +
|
|
'<(\\S*)>' + // src url = $3
|
|
'|' +
|
|
'(' + this.nested_url_parenthesis_re + ')' + // src url = $4
|
|
')' +
|
|
'[ \\n]*' +
|
|
'(' + // $5
|
|
'([\'"])' + // quote char = $6
|
|
'(.*?)' + // title = $7
|
|
'\\6' + // matching quote
|
|
'[ \\n]*' +
|
|
')?' + // title is optional
|
|
'\\)' +
|
|
')',
|
|
'mg'
|
|
), function(match, whole_match, alt_text, url3, url4, x5, x6, title) {
|
|
//console.log(match);
|
|
var url = url3 ? url3 : url4;
|
|
|
|
alt_text = self.encodeAttribute(alt_text);
|
|
url = self.encodeAttribute(url);
|
|
var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
|
|
if ('undefined' !== typeof title && title !== '') {
|
|
title = self.encodeAttribute(title);
|
|
result += " title=\"" + title + "\""; // $title already quoted
|
|
}
|
|
result += self.empty_element_suffix;
|
|
|
|
return self.hashPart(result);
|
|
});
|
|
}
|
|
|
|
return text;
|
|
};
|
|
|
|
Markdown_Parser.prototype.doHeaders = function(text) {
|
|
var self = this;
|
|
// Setext-style headers:
|
|
// Header 1
|
|
// ========
|
|
//
|
|
// Header 2
|
|
// --------
|
|
//
|
|
text = text.replace(/^(.+?)[ ]*\n(=+|-+)[ ]*\n+/mg, function(match, span, line) {
|
|
//console.log(match);
|
|
// Terrible hack to check we haven't found an empty list item.
|
|
if(line == '-' && span.match(/^-(?: |$)/)) {
|
|
return match;
|
|
}
|
|
var level = line.charAt(0) == '=' ? 1 : 2;
|
|
var block = "<h" + level + ">" + self.runSpanGamut(span) + "</h" + level + ">";
|
|
return "\n" + self.hashBlock(block) + "\n\n";
|
|
});
|
|
|
|
// atx-style headers:
|
|
// # Header 1
|
|
// ## Header 2
|
|
// ## Header 2 with closing hashes ##
|
|
// ...
|
|
// ###### Header 6
|
|
//
|
|
text = text.replace(new RegExp(
|
|
'^(\\#{1,6})' + // $1 = string of #\'s
|
|
'[ ]*' +
|
|
'(.+?)' + // $2 = Header text
|
|
'[ ]*' +
|
|
'\\#*' + // optional closing #\'s (not counted)
|
|
'\\n+',
|
|
'mg'
|
|
), function(match, hashes, span) {
|
|
//console.log(match);
|
|
var level = hashes.length;
|
|
var block = "<h" + level + ">" + self.runSpanGamut(span) + "</h" + level + ">";
|
|
return "\n" + self.hashBlock(block) + "\n\n";
|
|
});
|
|
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Form HTML ordered (numbered) and unordered (bulleted) lists.
|
|
*/
|
|
Markdown_Parser.prototype.doLists = function(text) {
|
|
var less_than_tab = this.tab_width - 1;
|
|
|
|
// Re-usable patterns to match list item bullets and number markers:
|
|
var marker_ul_re = '[\\*\\+-]';
|
|
var marker_ol_re = '\\d+[\\.]';
|
|
var marker_any_re = "(?:" + marker_ul_re + "|" + marker_ol_re + ")";
|
|
|
|
var self = this;
|
|
var _doLists_callback = function(match, list, x2, x3, type) {
|
|
//console.log(match);
|
|
// Re-usable patterns to match list item bullets and number markers:
|
|
var list_type = type.match(marker_ul_re) ? "ul" : "ol";
|
|
|
|
var marker_any_re = list_type == "ul" ? marker_ul_re : marker_ol_re;
|
|
|
|
list += "\n";
|
|
var result = self.processListItems(list, marker_any_re);
|
|
|
|
result = self.hashBlock("<" + list_type + ">\n" + result + "</" + list_type + ">");
|
|
return "\n" + result + "\n\n";
|
|
};
|
|
|
|
var markers_relist = [
|
|
[marker_ul_re, marker_ol_re],
|
|
[marker_ol_re, marker_ul_re]
|
|
];
|
|
|
|
for (var i = 0; i < markers_relist.length; i++) {
|
|
var marker_re = markers_relist[i][0];
|
|
var other_marker_re = markers_relist[i][1];
|
|
// Re-usable pattern to match any entirel ul or ol list:
|
|
var whole_list_re =
|
|
'(' + // $1 = whole list
|
|
'(' + // $2
|
|
'([ ]{0,' + less_than_tab + '})' + // $3 = number of spaces
|
|
'(' + marker_re + ')' + // $4 = first list item marker
|
|
'[ ]+' +
|
|
')' +
|
|
'[\\s\\S]+?' +
|
|
'(' + // $5
|
|
'(?=\\x03)' + // \z
|
|
'|' +
|
|
'\\n{2,}' +
|
|
'(?=\\S)' +
|
|
'(?!' + // Negative lookahead for another list item marker
|
|
'[ ]*' +
|
|
marker_re + '[ ]+' +
|
|
')' +
|
|
'|' +
|
|
'(?=' + // Lookahead for another kind of list
|
|
'\\n' +
|
|
'\\3' + // Must have the same indentation
|
|
other_marker_re + '[ ]+' +
|
|
')' +
|
|
')' +
|
|
')'; // mx
|
|
|
|
// We use a different prefix before nested lists than top-level lists.
|
|
// See extended comment in _ProcessListItems().
|
|
|
|
text = this.__wrapSTXETX__(text);
|
|
if (this.list_level) {
|
|
text = text.replace(new RegExp('^' + whole_list_re, "mg"), _doLists_callback);
|
|
}
|
|
else {
|
|
text = text.replace(new RegExp(
|
|
'(?:(?=\\n)\\n|\\x02\\n?)' + // Must eat the newline
|
|
whole_list_re, "mg"
|
|
), _doLists_callback);
|
|
}
|
|
text = this.__unwrapSTXETX__(text);
|
|
}
|
|
|
|
return text;
|
|
};
|
|
|
|
// var $list_level = 0;
|
|
|
|
/**
|
|
* Process the contents of a single ordered or unordered list, splitting it
|
|
* into individual list items.
|
|
*/
|
|
Markdown_Parser.prototype.processListItems = function(list_str, marker_any_re) {
|
|
// The $this->list_level global keeps track of when we're inside a list.
|
|
// Each time we enter a list, we increment it; when we leave a list,
|
|
// we decrement. If it's zero, we're not in a list anymore.
|
|
//
|
|
// We do this because when we're not inside a list, we want to treat
|
|
// something like this:
|
|
//
|
|
// I recommend upgrading to version
|
|
// 8. Oops, now this line is treated
|
|
// as a sub-list.
|
|
//
|
|
// As a single paragraph, despite the fact that the second line starts
|
|
// with a digit-period-space sequence.
|
|
//
|
|
// Whereas when we're inside a list (or sub-list), that line will be
|
|
// treated as the start of a sub-list. What a kludge, huh? This is
|
|
// an aspect of Markdown's syntax that's hard to parse perfectly
|
|
// without resorting to mind-reading. Perhaps the solution is to
|
|
// change the syntax rules such that sub-lists must start with a
|
|
// starting cardinal number; e.g. "1." or "a.".
|
|
|
|
if('undefined' === typeof this.list_level) {
|
|
this.list_level = 0;
|
|
}
|
|
this.list_level++;
|
|
|
|
// trim trailing blank lines:
|
|
list_str = this.__wrapSTXETX__(list_str);
|
|
list_str = list_str.replace(/\n{2,}(?=\x03)/m, "\n");
|
|
list_str = this.__unwrapSTXETX__(list_str);
|
|
|
|
var self = this;
|
|
list_str = this.__wrapSTXETX__(list_str);
|
|
list_str = list_str.replace(new RegExp(
|
|
'(\\n)?' + // leading line = $1
|
|
'([ ]*)' + // leading whitespace = $2
|
|
'(' + marker_any_re + // list marker and space = $3
|
|
'(?:[ ]+|(?=\\n))' + // space only required if item is not empty
|
|
')' +
|
|
'([\\s\\S]*?)' + // list item text = $4
|
|
'(?:(\\n+(?=\\n))|\\n)' + // tailing blank line = $5
|
|
'(?=\\n*(\\x03|\\2(' + marker_any_re + ')(?:[ ]+|(?=\\n))))',
|
|
"gm"
|
|
), function(match, leading_line, leading_space, marker_space, item, tailing_blank_line) {
|
|
//console.log(match);
|
|
//console.log(item, [leading_line ? leading_line.length : 0, tailing_blank_line ? tailing_blank_line.length : 0]);
|
|
if (leading_line || tailing_blank_line || item.match(/\n{2,}/)) {
|
|
// Replace marker with the appropriate whitespace indentation
|
|
item = leading_space + self._php_str_repeat(' ', marker_space.length) + item;
|
|
item = self.runBlockGamut(self.outdent(item) + "\n");
|
|
}
|
|
else {
|
|
// Recursion for sub-lists:
|
|
item = self.doLists(self.outdent(item));
|
|
item = item.replace(/\n+$/m, '');
|
|
item = self.runSpanGamut(item);
|
|
}
|
|
|
|
return "<li>" + item + "</li>\n";
|
|
});
|
|
list_str = this.__unwrapSTXETX__(list_str);
|
|
|
|
this.list_level--;
|
|
return list_str;
|
|
};
|
|
|
|
/**
|
|
* Process Markdown `<pre><code>` blocks.
|
|
*/
|
|
Markdown_Parser.prototype.doCodeBlocks = function(text) {
|
|
var self = this;
|
|
text = this.__wrapSTXETX__(text);
|
|
text = text.replace(new RegExp(
|
|
'(?:^|\\n\\n|(?=\\x02)\\n)?' +
|
|
'(' + // $1 = the code block -- one or more lines, starting with a space/tab
|
|
'(?:' +
|
|
'(?=(' +
|
|
'[ ]{' + this.tab_width + ',}' + // Lines must start with a tab or a tab-width of spaces
|
|
'.*\\n+' +
|
|
'))\\2' +
|
|
')+' +
|
|
')' +
|
|
'((?=^[ ]{0,' + this.tab_width + '}\\S)|(?:\\n*(?=\\x03)))', // Lookahead for non-space at line-start, or end of doc
|
|
'mg'
|
|
), function(match, codeblock) {
|
|
//console.log(match);
|
|
codeblock = self.outdent(codeblock);
|
|
codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock);
|
|
|
|
// trim leading newlines and trailing newlines
|
|
codeblock = self.__wrapSTXETX__(codeblock);
|
|
codeblock = codeblock.replace(/(?=\x02)\n+|\n+(?=\x03)/g, '');
|
|
codeblock = self.__unwrapSTXETX__(codeblock);
|
|
|
|
codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
|
|
return "\n\n" + self.hashBlock(codeblock) + "\n\n";
|
|
});
|
|
text = this.__unwrapSTXETX__(text);
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Create a code span markup for $code. Called from handleSpanToken.
|
|
*/
|
|
Markdown_Parser.prototype.makeCodeSpan = function(code) {
|
|
code = this._php_htmlspecialchars_ENT_NOQUOTES(this._php_trim(code));
|
|
return this.hashPart("<code>" + code + "</code>");
|
|
};
|
|
|
|
/**
|
|
* Prepare regular expressions for searching emphasis tokens in any
|
|
* context.
|
|
*/
|
|
Markdown_Parser.prototype.prepareItalicsAndBold = function() {
|
|
this.em_strong_prepared_relist = {};
|
|
for(var i = 0; i < this.em_relist.length; i++) {
|
|
var em = this.em_relist[i][0];
|
|
var em_re = this.em_relist[i][1];
|
|
for(var j = 0; j < this.strong_relist.length; j++) {
|
|
var strong = this.strong_relist[j][0];
|
|
var strong_re = this.strong_relist[j][1];
|
|
// Construct list of allowed token expressions.
|
|
var token_relist = [];
|
|
for(var k = 0; k < this.em_strong_relist.length; k++) {
|
|
var em_strong = this.em_strong_relist[k][0];
|
|
var em_strong_re = this.em_strong_relist[k][1];
|
|
if(em + strong == em_strong) {
|
|
token_relist.push(em_strong_re);
|
|
}
|
|
}
|
|
token_relist.push(em_re);
|
|
token_relist.push(strong_re);
|
|
|
|
// Construct master expression from list.
|
|
var token_re = new RegExp('(' + token_relist.join('|') + ')');
|
|
this.em_strong_prepared_relist['rx_' + em + strong] = token_re;
|
|
}
|
|
}
|
|
};
|
|
|
|
Markdown_Parser.prototype.doItalicsAndBold = function(text) {
|
|
var em = '';
|
|
var strong = '';
|
|
var tree_char_em = false;
|
|
var text_stack = [''];
|
|
var token_stack = [];
|
|
var token = '';
|
|
|
|
while (1) {
|
|
//
|
|
// Get prepared regular expression for seraching emphasis tokens
|
|
// in current context.
|
|
//
|
|
var token_re = this.em_strong_prepared_relist['rx_' + em + strong];
|
|
|
|
//
|
|
// Each loop iteration search for the next emphasis token.
|
|
// Each token is then passed to handleSpanToken.
|
|
//
|
|
var parts = text.match(token_re); //PREG_SPLIT_DELIM_CAPTURE
|
|
if(parts) {
|
|
var left = RegExp.leftContext;
|
|
var right = RegExp.rightContext;
|
|
var pre = "";
|
|
var marker = parts[1];
|
|
for(var mg = 2; mg < parts.length; mg += 2) {
|
|
if('undefined' !== typeof parts[mg] && parts[mg] != '') {
|
|
pre = parts[mg];
|
|
marker = parts[mg + 1];
|
|
break;
|
|
}
|
|
}
|
|
//console.log([left + pre, marker]);
|
|
text_stack[0] += (left + pre);
|
|
token = marker;
|
|
text = right;
|
|
}
|
|
else {
|
|
text_stack[0] += text;
|
|
token = '';
|
|
text = '';
|
|
}
|
|
if(token == '') {
|
|
// Reached end of text span: empty stack without emitting.
|
|
// any more emphasis.
|
|
while (token_stack.length > 0 && token_stack[0].length > 0) {
|
|
text_stack[1] += token_stack.shift();
|
|
var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack);
|
|
text_stack[0] += text_stack_prev0;
|
|
}
|
|
break;
|
|
}
|
|
|
|
var tag, span;
|
|
|
|
var token_len = token.length;
|
|
if (tree_char_em) {
|
|
// Reached closing marker while inside a three-char emphasis.
|
|
if (token_len == 3) {
|
|
// Three-char closing marker, close em and strong.
|
|
token_stack.shift();
|
|
span = text_stack.shift();
|
|
span = this.runSpanGamut(span);
|
|
span = "<strong><em>" + span + "</em></strong>";
|
|
text_stack[0] += this.hashPart(span);
|
|
em = '';
|
|
strong = '';
|
|
} else {
|
|
// Other closing marker: close one em or strong and
|
|
// change current token state to match the other
|
|
token_stack[0] = this._php_str_repeat(token.charAt(0), 3 - token_len);
|
|
tag = token_len == 2 ? "strong" : "em";
|
|
span = text_stack[0];
|
|
span = this.runSpanGamut(span);
|
|
span = "<" + tag + ">" + span + "</" + tag + ">";
|
|
text_stack[0] = this.hashPart(span);
|
|
if(tag == 'strong') { strong = ''; } else { em = ''; }
|
|
}
|
|
tree_char_em = false;
|
|
} else if (token_len == 3) {
|
|
if (em != '') {
|
|
// Reached closing marker for both em and strong.
|
|
// Closing strong marker:
|
|
for (var i = 0; i < 2; ++i) {
|
|
var shifted_token = token_stack.shift();
|
|
tag = shifted_token.length == 2 ? "strong" : "em";
|
|
span = text_stack.shift();
|
|
span = this.runSpanGamut(span);
|
|
span = "<" + tag + ">" + span + "</" + tag + ">";
|
|
text_stack[0] = this.hashPart(span);
|
|
if(tag == 'strong') { strong = ''; } else { em = ''; }
|
|
}
|
|
} else {
|
|
// Reached opening three-char emphasis marker. Push on token
|
|
// stack; will be handled by the special condition above.
|
|
em = token.charAt(0);
|
|
strong = em + em;
|
|
token_stack.unshift(token);
|
|
text_stack.unshift('');
|
|
tree_char_em = true;
|
|
}
|
|
} else if (token_len == 2) {
|
|
if (strong != '') {
|
|
// Unwind any dangling emphasis marker:
|
|
if (token_stack[0].length == 1) {
|
|
text_stack[1] += token_stack.shift();
|
|
var text_stack_prev0 = text_stack.shift(); // $text_stack[0] .= array_shift($text_stack);
|
|
text_stack[0] += text_stack_prev0;
|
|
}
|
|
// Closing strong marker:
|
|
token_stack.shift();
|
|
span = text_stack.shift();
|
|
span = this.runSpanGamut(span);
|
|
span = "<strong>" + span + "</strong>";
|
|
text_stack[0] += this.hashPart(span);
|
|
strong = '';
|
|
} else {
|
|
token_stack.unshift(token);
|
|
text_stack.unshift('');
|
|
strong = token;
|
|
}
|
|
} else {
|
|
// Here $token_len == 1
|
|
if (em != '') {
|
|
if (token_stack[0].length == 1) {
|
|
// Closing emphasis marker:
|
|
token_stack.shift();
|
|
span = text_stack.shift();
|
|
span = this.runSpanGamut(span);
|
|
span = "<em>" + span + "</em>";
|
|
text_stack[0] += this.hashPart(span);
|
|
em = '';
|
|
} else {
|
|
text_stack[0] += token;
|
|
}
|
|
} else {
|
|
token_stack.unshift(token);
|
|
text_stack.unshift('');
|
|
em = token;
|
|
}
|
|
}
|
|
}
|
|
return text_stack[0];
|
|
};
|
|
|
|
|
|
Markdown_Parser.prototype.doBlockQuotes = function(text) {
|
|
var self = this;
|
|
text = text.replace(new RegExp(
|
|
'(' + // Wrap whole match in $1
|
|
'(?:' +
|
|
'^[ ]*>[ ]?' + // ">" at the start of a line
|
|
'.+\\n' + // rest of the first line
|
|
'(.+\\n)*' + // subsequent consecutive lines
|
|
'\\n*' + // blanks
|
|
')+' +
|
|
')',
|
|
'mg'
|
|
), function(match, bq) {
|
|
//console.log(match);
|
|
// trim one level of quoting - trim whitespace-only lines
|
|
bq = bq.replace(/^[ ]*>[ ]?|^[ ]+$/mg, '');
|
|
bq = self.runBlockGamut(bq); // recurse
|
|
|
|
bq = bq.replace(/^/mg, " ");
|
|
// These leading spaces cause problem with <pre> content,
|
|
// so we need to fix that:
|
|
bq = bq.replace(/(\\s*<pre>[\\s\\S]+?<\/pre>)/mg, function(match, pre) {
|
|
//console.log(match);
|
|
pre = pre.replace(/^ /m, '');
|
|
return pre;
|
|
});
|
|
|
|
return "\n" + self.hashBlock("<blockquote>\n" + bq + "\n</blockquote>") + "\n\n";
|
|
});
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Params:
|
|
* $text - string to process with html <p> tags
|
|
*/
|
|
Markdown_Parser.prototype.formParagraphs = function(text) {
|
|
|
|
// Strip leading and trailing lines:
|
|
text = this.__wrapSTXETX__(text);
|
|
text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, "");
|
|
text = this.__unwrapSTXETX__(text);
|
|
// [porting note]
|
|
// below may be faster than js regexp.
|
|
//for(var s = 0; s < text.length && text.charAt(s) == "\n"; s++) { }
|
|
//text = text.substr(s);
|
|
//for(var e = text.length; e > 0 && text.charAt(e - 1) == "\n"; e--) { }
|
|
//text = text.substr(0, e);
|
|
|
|
var grafs = text.split(/\n{2,}/m);
|
|
//preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
|
|
|
|
//
|
|
// Wrap <p> tags and unhashify HTML blocks
|
|
//
|
|
for(var i = 0; i < grafs.length; i++) {
|
|
var value = grafs[i];
|
|
if(value == "") {
|
|
// [porting note]
|
|
// This case is replacement for PREG_SPLIT_NO_EMPTY.
|
|
}
|
|
else if (!value.match(/^B\x1A[0-9]+B$/)) {
|
|
// Is a paragraph.
|
|
value = this.runSpanGamut(value);
|
|
value = value.replace(/^([ ]*)/, "<p>");
|
|
value += "</p>";
|
|
grafs[i] = this.unhash(value);
|
|
}
|
|
else {
|
|
// Is a block.
|
|
// Modify elements of @grafs in-place...
|
|
var graf = value;
|
|
var block = this.html_hashes[graf];
|
|
graf = block;
|
|
//if (preg_match('{
|
|
// \A
|
|
// ( # $1 = <div> tag
|
|
// <div \s+
|
|
// [^>]*
|
|
// \b
|
|
// markdown\s*=\s* ([\'"]) # $2 = attr quote char
|
|
// 1
|
|
// \2
|
|
// [^>]*
|
|
// >
|
|
// )
|
|
// ( # $3 = contents
|
|
// .*
|
|
// )
|
|
// (</div>) # $4 = closing tag
|
|
// \z
|
|
// }xs', $block, $matches))
|
|
//{
|
|
// list(, $div_open, , $div_content, $div_close) = $matches;
|
|
//
|
|
// # We can't call Markdown(), because that resets the hash;
|
|
// # that initialization code should be pulled into its own sub, though.
|
|
// $div_content = $this->hashHTMLBlocks($div_content);
|
|
//
|
|
// # Run document gamut methods on the content.
|
|
// foreach ($this->document_gamut as $method => $priority) {
|
|
// $div_content = $this->$method($div_content);
|
|
// }
|
|
//
|
|
// $div_open = preg_replace(
|
|
// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
|
|
//
|
|
// $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
|
|
//}
|
|
grafs[i] = graf;
|
|
}
|
|
}
|
|
|
|
return grafs.join("\n\n");
|
|
};
|
|
|
|
/**
|
|
* Encode text for a double-quoted HTML attribute. This function
|
|
* is *not* suitable for attributes enclosed in single quotes.
|
|
*/
|
|
Markdown_Parser.prototype.encodeAttribute = function(text) {
|
|
text = this.encodeAmpsAndAngles(text);
|
|
text = text.replace(/"/g, '"');
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Smart processing for ampersands and angle brackets that need to
|
|
* be encoded. Valid character entities are left alone unless the
|
|
* no-entities mode is set.
|
|
*/
|
|
Markdown_Parser.prototype.encodeAmpsAndAngles = function(text) {
|
|
if (this.no_entities) {
|
|
text = text.replace(/&/g, '&');
|
|
} else {
|
|
// Ampersand-encoding based entirely on Nat Irons's Amputator
|
|
// MT plugin: <http://bumppo.net/projects/amputator/>
|
|
text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/, '&');
|
|
}
|
|
// Encode remaining <'s
|
|
text = text.replace(/</g, '<');
|
|
|
|
return text;
|
|
};
|
|
|
|
Markdown_Parser.prototype.doAutoLinks = function(text) {
|
|
var self = this;
|
|
text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/i, function(match, address) {
|
|
//console.log(match);
|
|
var url = self.encodeAttribute(address);
|
|
var link = "<a href=\"" + url + "\">" + url + "</a>";
|
|
return self.hashPart(link);
|
|
});
|
|
|
|
// Email addresses: <address@domain.foo>
|
|
text = text.replace(new RegExp(
|
|
'<' +
|
|
'(?:mailto:)?' +
|
|
'(' +
|
|
'(?:' +
|
|
'[-!#$%&\'*+/=?^_`.{|}~\\w\\x80-\\xFF]+' +
|
|
'|' +
|
|
'".*?"' +
|
|
')' +
|
|
'\\@' +
|
|
'(?:' +
|
|
'[-a-z0-9\\x80-\\xFF]+(\\.[-a-z0-9\\x80-\\xFF]+)*\\.[a-z]+' +
|
|
'|' +
|
|
'\\[[\\d.a-fA-F:]+\\]' + // IPv4 & IPv6
|
|
')' +
|
|
')' +
|
|
'>',
|
|
'i'
|
|
), function(match, address) {
|
|
//console.log(match);
|
|
var link = self.encodeEmailAddress(address);
|
|
return self.hashPart(link);
|
|
});
|
|
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Input: an email address, e.g. "foo@example.com"
|
|
*
|
|
* Output: the email address as a mailto link, with each character
|
|
* of the address encoded as either a decimal or hex entity, in
|
|
* the hopes of foiling most address harvesting spam bots. E.g.:
|
|
*
|
|
* <p><a href="mailto:foo
|
|
* @example.co
|
|
* m">foo@exampl
|
|
* e.com</a></p>
|
|
*
|
|
* Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
|
|
* With some optimizations by Milian Wolff.
|
|
*/
|
|
Markdown_Parser.prototype.encodeEmailAddress = function(addr) {
|
|
if('undefined' === typeof arguments.callee.crctable) {
|
|
arguments.callee.crctable =
|
|
"00000000 77073096 EE0E612C 990951BA 076DC419 706AF48F E963A535 9E6495A3 " +
|
|
"0EDB8832 79DCB8A4 E0D5E91E 97D2D988 09B64C2B 7EB17CBD E7B82D07 90BF1D91 " +
|
|
"1DB71064 6AB020F2 F3B97148 84BE41DE 1ADAD47D 6DDDE4EB F4D4B551 83D385C7 " +
|
|
"136C9856 646BA8C0 FD62F97A 8A65C9EC 14015C4F 63066CD9 FA0F3D63 8D080DF5 " +
|
|
"3B6E20C8 4C69105E D56041E4 A2677172 3C03E4D1 4B04D447 D20D85FD A50AB56B " +
|
|
"35B5A8FA 42B2986C DBBBC9D6 ACBCF940 32D86CE3 45DF5C75 DCD60DCF ABD13D59 " +
|
|
"26D930AC 51DE003A C8D75180 BFD06116 21B4F4B5 56B3C423 CFBA9599 B8BDA50F " +
|
|
"2802B89E 5F058808 C60CD9B2 B10BE924 2F6F7C87 58684C11 C1611DAB B6662D3D " +
|
|
"76DC4190 01DB7106 98D220BC EFD5102A 71B18589 06B6B51F 9FBFE4A5 E8B8D433 " +
|
|
"7807C9A2 0F00F934 9609A88E E10E9818 7F6A0DBB 086D3D2D 91646C97 E6635C01 " +
|
|
"6B6B51F4 1C6C6162 856530D8 F262004E 6C0695ED 1B01A57B 8208F4C1 F50FC457 " +
|
|
"65B0D9C6 12B7E950 8BBEB8EA FCB9887C 62DD1DDF 15DA2D49 8CD37CF3 FBD44C65 " +
|
|
"4DB26158 3AB551CE A3BC0074 D4BB30E2 4ADFA541 3DD895D7 A4D1C46D D3D6F4FB " +
|
|
"4369E96A 346ED9FC AD678846 DA60B8D0 44042D73 33031DE5 AA0A4C5F DD0D7CC9 " +
|
|
"5005713C 270241AA BE0B1010 C90C2086 5768B525 206F85B3 B966D409 CE61E49F " +
|
|
"5EDEF90E 29D9C998 B0D09822 C7D7A8B4 59B33D17 2EB40D81 B7BD5C3B C0BA6CAD " +
|
|
"EDB88320 9ABFB3B6 03B6E20C 74B1D29A EAD54739 9DD277AF 04DB2615 73DC1683 " +
|
|
"E3630B12 94643B84 0D6D6A3E 7A6A5AA8 E40ECF0B 9309FF9D 0A00AE27 7D079EB1 " +
|
|
"F00F9344 8708A3D2 1E01F268 6906C2FE F762575D 806567CB 196C3671 6E6B06E7 " +
|
|
"FED41B76 89D32BE0 10DA7A5A 67DD4ACC F9B9DF6F 8EBEEFF9 17B7BE43 60B08ED5 " +
|
|
"D6D6A3E8 A1D1937E 38D8C2C4 4FDFF252 D1BB67F1 A6BC5767 3FB506DD 48B2364B " +
|
|
"D80D2BDA AF0A1B4C 36034AF6 41047A60 DF60EFC3 A867DF55 316E8EEF 4669BE79 " +
|
|
"CB61B38C BC66831A 256FD2A0 5268E236 CC0C7795 BB0B4703 220216B9 5505262F " +
|
|
"C5BA3BBE B2BD0B28 2BB45A92 5CB36A04 C2D7FFA7 B5D0CF31 2CD99E8B 5BDEAE1D " +
|
|
"9B64C2B0 EC63F226 756AA39C 026D930A 9C0906A9 EB0E363F 72076785 05005713 " +
|
|
"95BF4A82 E2B87A14 7BB12BAE 0CB61B38 92D28E9B E5D5BE0D 7CDCEFB7 0BDBDF21 " +
|
|
"86D3D2D4 F1D4E242 68DDB3F8 1FDA836E 81BE16CD F6B9265B 6FB077E1 18B74777 " +
|
|
"88085AE6 FF0F6A70 66063BCA 11010B5C 8F659EFF F862AE69 616BFFD3 166CCF45 " +
|
|
"A00AE278 D70DD2EE 4E048354 3903B3C2 A7672661 D06016F7 4969474D 3E6E77DB " +
|
|
"AED16A4A D9D65ADC 40DF0B66 37D83BF0 A9BCAE53 DEBB9EC5 47B2CF7F 30B5FFE9 " +
|
|
"BDBDF21C CABAC28A 53B39330 24B4A3A6 BAD03605 CDD70693 54DE5729 23D967BF " +
|
|
"B3667A2E C4614AB8 5D681B02 2A6F2B94 B40BBE37 C30C8EA1 5A05DF1B 2D02EF8D".split(' ');
|
|
}
|
|
var crctable = arguments.callee.crctable;
|
|
function _crc32(str) {
|
|
var crc = 0;
|
|
crc = crc ^ (-1);
|
|
for (var i = 0; i < str.length; ++i) {
|
|
var y = (crc ^ str.charCodeAt(i)) & 0xff;
|
|
var x = "0x" + crctable[y];
|
|
crc = (crc >>> 8) ^ x;
|
|
}
|
|
return (crc ^ (-1)) >>> 0;
|
|
}
|
|
|
|
addr = "mailto:" + addr;
|
|
var chars = [];
|
|
var i;
|
|
for(i = 0; i < addr.length; i++) {
|
|
chars.push(addr.charAt(i));
|
|
}
|
|
var seed = Math.floor(Math.abs(_crc32(addr) / addr.length)); // # Deterministic seed.
|
|
|
|
for(i = 0; i < chars.length; i++) {
|
|
var c = chars[i];
|
|
var ord = c.charCodeAt(0);
|
|
// Ignore non-ascii chars.
|
|
if(ord < 128) {
|
|
var r = (seed * (1 + i)) % 100; // Pseudo-random function.
|
|
// roughly 10% raw, 45% hex, 45% dec
|
|
// '@' *must* be encoded. I insist.
|
|
if(r > 90 && c != '@') { /* do nothing */ }
|
|
else if(r < 45) { chars[i] = '&#x' + ord.toString(16) + ';'; }
|
|
else { chars[i] = '&#' + ord.toString(10) + ';'; }
|
|
}
|
|
}
|
|
|
|
addr = chars.join('');
|
|
var text = chars.splice(7, chars.length - 1).join(''); // text without `mailto:`
|
|
addr = "<a href=\"" + addr + "\">" + text + "</a>";
|
|
|
|
return addr;
|
|
};
|
|
|
|
/**
|
|
* Take the string $str and parse it into tokens, hashing embeded HTML,
|
|
* escaped characters and handling code spans.
|
|
*/
|
|
Markdown_Parser.prototype.parseSpan = function(str) {
|
|
var output = '';
|
|
|
|
var span_re = new RegExp(
|
|
'(' +
|
|
'\\\\' + this.escape_chars_re +
|
|
'|' +
|
|
// This expression is too difficult for JS: '(?<![`\\\\])'
|
|
// Resoled by hand coded process.
|
|
'`+' + // code span marker
|
|
(this.no_markup ? '' : (
|
|
'|' +
|
|
'<!--.*?-->' + // comment
|
|
'|' +
|
|
'<\\?.*?\\?>|<%.*?%>' + // processing instruction
|
|
'|' +
|
|
'<[/!$]?[-a-zA-Z0-9:_]+' + // regular tags
|
|
'(?=' +
|
|
'\\s' +
|
|
'(?=[^"\'>]+|"[^"]*"|\'[^\']*\')*' +
|
|
')?' +
|
|
'>'
|
|
)) +
|
|
')'
|
|
);
|
|
|
|
while(1) {
|
|
//
|
|
// Each loop iteration seach for either the next tag, the next
|
|
// openning code span marker, or the next escaped character.
|
|
// Each token is then passed to handleSpanToken.
|
|
//
|
|
var parts = str.match(span_re); //PREG_SPLIT_DELIM_CAPTURE
|
|
if(parts) {
|
|
if(RegExp.leftContext) {
|
|
output += RegExp.leftContext;
|
|
}
|
|
// Back quote but after backslash is to be ignored.
|
|
if(RegExp.lastMatch.charAt(0) == "`" &&
|
|
RegExp.leftContext.charAt(RegExp.leftContext.length - 1) == "\\"
|
|
) {
|
|
output += RegExp.lastMatch;
|
|
str = RegExp.rightContext;
|
|
continue;
|
|
}
|
|
var r = this.handleSpanToken(RegExp.lastMatch, RegExp.rightContext);
|
|
output += r[0];
|
|
str = r[1];
|
|
}
|
|
else {
|
|
output += str;
|
|
break;
|
|
}
|
|
}
|
|
return output;
|
|
};
|
|
|
|
|
|
/**
|
|
* Handle $token provided by parseSpan by determining its nature and
|
|
* returning the corresponding value that should replace it.
|
|
*/
|
|
Markdown_Parser.prototype.handleSpanToken = function(token, str) {
|
|
//console.log([token, str]);
|
|
switch (token.charAt(0)) {
|
|
case "\\":
|
|
return [this.hashPart("&#" + token.charCodeAt(1) + ";"), str];
|
|
case "`":
|
|
// Search for end marker in remaining text.
|
|
if (str.match(new RegExp('^([\\s\\S]*?[^`])' + this._php_preg_quote(token) + '(?!`)([\\s\\S]*)$', 'm'))) {
|
|
var code = RegExp.$1;
|
|
str = RegExp.$2;
|
|
var codespan = this.makeCodeSpan(code);
|
|
return [this.hashPart(codespan), str];
|
|
}
|
|
return [token, str]; // return as text since no ending marker found.
|
|
default:
|
|
return [this.hashPart(token), str];
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Remove one level of line-leading tabs or spaces
|
|
*/
|
|
Markdown_Parser.prototype.outdent = function(text) {
|
|
return text.replace(new RegExp('^(\\t|[ ]{1,' + this.tab_width + '})', 'mg'), '');
|
|
};
|
|
|
|
|
|
//# String length function for detab. `_initDetab` will create a function to
|
|
//# hanlde UTF-8 if the default function does not exist.
|
|
//var $utf8_strlen = 'mb_strlen';
|
|
|
|
/**
|
|
* Replace tabs with the appropriate amount of space.
|
|
*/
|
|
Markdown_Parser.prototype.detab = function(text) {
|
|
// For each line we separate the line in blocks delemited by
|
|
// tab characters. Then we reconstruct every line by adding the
|
|
// appropriate number of space between each blocks.
|
|
var self = this;
|
|
return text.replace(/^.*\t.*$/mg, function(line) {
|
|
//$strlen = $this->utf8_strlen; # strlen function for UTF-8.
|
|
// Split in blocks.
|
|
var blocks = line.split("\t");
|
|
// Add each blocks to the line.
|
|
line = blocks.shift(); // Do not add first block twice.
|
|
for(var i = 0; i < blocks.length; i++) {
|
|
var block = blocks[i];
|
|
// Calculate amount of space, insert spaces, insert block.
|
|
var amount = self.tab_width - line.length % self.tab_width;
|
|
line += self._php_str_repeat(" ", amount) + block;
|
|
}
|
|
return line;
|
|
});
|
|
};
|
|
|
|
/**
|
|
* Swap back in all the tags hashed by _HashHTMLBlocks.
|
|
*/
|
|
Markdown_Parser.prototype.unhash = function(text) {
|
|
var self = this;
|
|
return text.replace(/(.)\x1A[0-9]+\1/g, function(match) {
|
|
return self.html_hashes[match];
|
|
});
|
|
};
|
|
/*-------------------------------------------------------------------------*/
|
|
|
|
/**
|
|
* Constructor function. Initialize the parser object.
|
|
*/
|
|
function MarkdownExtra_Parser() {
|
|
|
|
// Prefix for footnote ids.
|
|
this.fn_id_prefix = "";
|
|
|
|
// Optional title attribute for footnote links and backlinks.
|
|
this.fn_link_title = MARKDOWN_FN_LINK_TITLE;
|
|
this.fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
|
|
|
|
// Optional class attribute for footnote links and backlinks.
|
|
this.fn_link_class = MARKDOWN_FN_LINK_CLASS;
|
|
this.fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
|
|
|
|
// Predefined abbreviations.
|
|
this.predef_abbr = {};
|
|
|
|
// Extra variables used during extra transformations.
|
|
this.footnotes = {};
|
|
this.footnotes_ordered = [];
|
|
this.abbr_desciptions = {};
|
|
this.abbr_word_re = '';
|
|
|
|
// Give the current footnote number.
|
|
this.footnote_counter = 1;
|
|
|
|
// ### HTML Block Parser ###
|
|
|
|
// Tags that are always treated as block tags:
|
|
this.block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
|
|
|
|
// Tags treated as block tags only if the opening tag is alone on it's line:
|
|
this.context_block_tags_re = 'script|noscript|math|ins|del';
|
|
|
|
// Tags where markdown="1" default to span mode:
|
|
this.contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
|
|
|
|
// Tags which must not have their contents modified, no matter where
|
|
// they appear:
|
|
this.clean_tags_re = 'script|math';
|
|
|
|
// Tags that do not need to be closed.
|
|
this.auto_close_tags_re = 'hr|img';
|
|
|
|
// Redefining emphasis markers so that emphasis by underscore does not
|
|
// work in the middle of a word.
|
|
this.em_relist = [
|
|
['' , '(?:(^|[^\\*])(\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(_)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
|
|
['*', '((?:\\S|^)[^\\*])(\\*)(?!\\*)'],
|
|
['_', '((?:\\S|^)[^_])(_)(?![a-zA-Z0-9_])']
|
|
];
|
|
this.strong_relist = [
|
|
['' , '(?:(^|[^\\*])(\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(__)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
|
|
['**', '((?:\\S|^)[^\\*])(\\*\\*)(?!\\*)'],
|
|
['__', '((?:\\S|^)[^_])(__)(?![a-zA-Z0-9_])']
|
|
];
|
|
this.em_strong_relist = [
|
|
['' , '(?:(^|[^\\*])(\\*\\*\\*)(?=[^\\*])|(^|[^a-zA-Z0-9_])(___)(?=[^_]))(?=\\S|$)(?![\\.,:;]\\s)'],
|
|
['***', '((?:\\S|^)[^\\*])(\\*\\*\\*)(?!\\*)'],
|
|
['___', '((?:\\S|^)[^_])(___)(?![a-zA-Z0-9_])']
|
|
];
|
|
|
|
// Add extra escapable characters before parent constructor
|
|
// initialize the table.
|
|
this.escape_chars += ':|';
|
|
|
|
// Insert extra document, block, and span transformations.
|
|
// Parent constructor will do the sorting.
|
|
this.document_gamut.push(['doFencedCodeBlocks', 5]);
|
|
this.document_gamut.push(['stripFootnotes', 15]);
|
|
this.document_gamut.push(['stripAbbreviations', 25]);
|
|
this.document_gamut.push(['appendFootnotes', 50]);
|
|
|
|
this.block_gamut.push(['doFencedCodeBlocks', 5]);
|
|
this.block_gamut.push(['doTables', 15]);
|
|
this.block_gamut.push(['doDefLists', 45]);
|
|
|
|
this.span_gamut.push(['doFootnotes', 5]);
|
|
this.span_gamut.push(['doAbbreviations', 70]);
|
|
}
|
|
MarkdownExtra_Parser.prototype = new Markdown_Parser();
|
|
|
|
/**
|
|
* Setting up Extra-specific variables.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.setup = function() {
|
|
this.constructor.prototype.setup.call(this);
|
|
|
|
this.footnotes = {};
|
|
this.footnotes_ordered = [];
|
|
this.abbr_desciptions = {};
|
|
this.abbr_word_re = '';
|
|
this.footnote_counter = 1;
|
|
|
|
for(var abbr_word in this.predef_abbr) {
|
|
var abbr_desc = this.predef_abbr[abbr_word];
|
|
if(this.abbr_word_re != '') {
|
|
this.abbr_word_re += '|';
|
|
}
|
|
this.abbr_word_re += this._php_preg_quote(abbr_word); // ?? str -> re?
|
|
this.abbr_desciptions[abbr_word] = this._php_trim(abbr_desc);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Clearing Extra-specific variables.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.teardown = function() {
|
|
this.footnotes = {};
|
|
this.footnotes_ordered = [];
|
|
this.abbr_desciptions = {};
|
|
this.abbr_word_re = '';
|
|
|
|
this.constructor.prototype.teardown.call(this);
|
|
};
|
|
|
|
|
|
/**
|
|
* Hashify HTML Blocks and "clean tags".
|
|
*
|
|
* We only want to do this for block-level HTML tags, such as headers,
|
|
* lists, and tables. That's because we still want to wrap <p>s around
|
|
* "paragraphs" that are wrapped in non-block-level tags, such as anchors,
|
|
* phrase emphasis, and spans. The list of tags we're looking for is
|
|
* hard-coded.
|
|
*
|
|
* This works by calling _HashHTMLBlocks_InMarkdown, which then calls
|
|
* _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
|
|
* attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
|
|
* _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
|
|
* These two functions are calling each other. It's recursive!
|
|
*/
|
|
MarkdownExtra_Parser.prototype.hashHTMLBlocks = function(text) {
|
|
//
|
|
// Call the HTML-in-Markdown hasher.
|
|
//
|
|
var r = this._hashHTMLBlocks_inMarkdown(text);
|
|
text = r[0];
|
|
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
|
|
*
|
|
* * $indent is the number of space to be ignored when checking for code
|
|
* blocks. This is important because if we don't take the indent into
|
|
* account, something like this (which looks right) won't work as expected:
|
|
*
|
|
* <div>
|
|
* <div markdown="1">
|
|
* Hello World. <-- Is this a Markdown code block or text?
|
|
* </div> <-- Is this a Markdown code block or a real tag?
|
|
* <div>
|
|
*
|
|
* If you don't like this, just don't indent the tag on which
|
|
* you apply the markdown="1" attribute.
|
|
*
|
|
* * If $enclosing_tag_re is not empty, stops at the first unmatched closing
|
|
* tag with that name. Nested tags supported.
|
|
*
|
|
* * If $span is true, text inside must treated as span. So any double
|
|
* newline will be replaced by a single newline so that it does not create
|
|
* paragraphs.
|
|
*
|
|
* Returns an array of that form: ( processed text , remaining text )
|
|
*/
|
|
MarkdownExtra_Parser.prototype._hashHTMLBlocks_inMarkdown = function(text, indent, enclosing_tag_re, span) {
|
|
if('undefined' === typeof indent) { indent = 0; }
|
|
if('undefined' === typeof enclosing_tag_re) { enclosing_tag_re = ''; }
|
|
if('undefined' === typeof span) { span = false; }
|
|
|
|
if(text === '') { return ['', '']; }
|
|
|
|
var matches;
|
|
|
|
// Regex to check for the presense of newlines around a block tag.
|
|
var newline_before_re = /(?:^\n?|\n\n)*$/;
|
|
var newline_after_re = new RegExp(
|
|
'^' + // Start of text following the tag.
|
|
'([ ]*<!--.*?-->)?' + // Optional comment.
|
|
'[ ]*\\n' , // Must be followed by newline.
|
|
'm'
|
|
);
|
|
|
|
// Regex to match any tag.
|
|
var block_tag_re = new RegExp(
|
|
'(' + // $2: Capture hole tag.
|
|
'</?' + // Any opening or closing tag.
|
|
'(' + // Tag name.
|
|
this.block_tags_re + '|' +
|
|
this.context_block_tags_re + '|' +
|
|
this.clean_tags_re + '|' +
|
|
'(?!\\s)' + enclosing_tag_re +
|
|
')' +
|
|
'(?:' +
|
|
'(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name.
|
|
'(?=(' +
|
|
'".*?"|' + // Double quotes (can contain `>`)
|
|
'\'.*?\'|' + // Single quotes (can contain `>`)
|
|
'.+?' + // Anything but quotes and `>`.
|
|
'))\\3*?' +
|
|
')?' +
|
|
'>' + // End of tag.
|
|
'|' +
|
|
'<!--.*?-->' + // HTML Comment
|
|
'|' +
|
|
'<\\?.*?\\?>|<%.*?%>' + // Processing instruction
|
|
'|' +
|
|
'<!\\[CDATA\\[.*?\\]\\]>' + // CData Block
|
|
'|' +
|
|
// Code span marker
|
|
'`+' +
|
|
( !span ? // If not in span.
|
|
'|' +
|
|
// Indented code block
|
|
'(?:^[ ]*\\n|^|\\n[ ]*\\n)' +
|
|
'[ ]{' + (indent + 4) + '}[^\\n]*\\n' +
|
|
'(?=' +
|
|
'(?:[ ]{' + (indent + 4) + '}[^\\n]*|[ ]*)\\n' +
|
|
')*' +
|
|
'|' +
|
|
// Fenced code block marker
|
|
'(?:^|\\n)' +
|
|
'[ ]{0,' + indent + '}~~~+[ ]*\\n'
|
|
: '' ) + // # End (if not is span).
|
|
')',
|
|
'm'
|
|
);
|
|
|
|
var depth = 0; // Current depth inside the tag tree.
|
|
var parsed = ""; // Parsed text that will be returned.
|
|
|
|
//
|
|
// Loop through every tag until we find the closing tag of the parent
|
|
// or loop until reaching the end of text if no parent tag specified.
|
|
//
|
|
do {
|
|
//
|
|
// Split the text using the first $tag_match pattern found.
|
|
// Text before pattern will be first in the array, text after
|
|
// pattern will be at the end, and between will be any catches made
|
|
// by the pattern.
|
|
//
|
|
var parts_available = text.match(block_tag_re); //PREG_SPLIT_DELIM_CAPTURE
|
|
var parts;
|
|
if(!parts_available) {
|
|
parts = [text];
|
|
}
|
|
else {
|
|
parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext];
|
|
}
|
|
|
|
// If in Markdown span mode, add a empty-string span-level hash
|
|
// after each newline to prevent triggering any block element.
|
|
if(span) {
|
|
var _void = this.hashPart("", ':');
|
|
var newline = _void + "\n";
|
|
parts[0] = _void + parts[0].replace(/\n/g, newline) + _void;
|
|
}
|
|
|
|
parsed += parts[0]; // Text before current tag.
|
|
|
|
// If end of $text has been reached. Stop loop.
|
|
if(!parts_available) {
|
|
text = "";
|
|
break;
|
|
}
|
|
|
|
var tag = parts[1]; // Tag to handle.
|
|
text = parts[2]; // Remaining text after current tag.
|
|
var tag_re = this._php_preg_quote(tag); // For use in a regular expression.
|
|
|
|
var t;
|
|
var block_text;
|
|
//
|
|
// Check for: Code span marker
|
|
//
|
|
|
|
if (tag.charAt(0) == "`") {
|
|
// Find corresponding end marker.
|
|
tag_re = this._php_preg_quote(tag);
|
|
if (matches = text.match(new RegExp('^((?=(.+?|\\n[^\\n])))/1*?[^`]' + tag_re + '[^`]'))) {
|
|
// End marker found: pass text unchanged until marker.
|
|
parsed += tag + matches[0];
|
|
text = text.substr(matches[0].length);
|
|
}
|
|
else {
|
|
// Unmatched marker: just skip it.
|
|
parsed += tag;
|
|
}
|
|
}
|
|
//
|
|
// Check for: Fenced code block marker.
|
|
//
|
|
else if(tag.match(new RegExp('^\\n?[ ]{0,' + (indent + 3) + '}~'))) {
|
|
// Fenced code block marker: find matching end marker.
|
|
tag_re = this._php_preg_quote(this._php_trim(tag));
|
|
if(matches = text.match(new RegExp('^(?:.*\\n)+?[ ]{0,' + indent + '}' + tag_re + '[ ]*\\n'))) {
|
|
// End marker found: pass text unchanged until marker.
|
|
parsed += tag + matches[0];
|
|
text = text.substr(matches[0].length);
|
|
}
|
|
else {
|
|
// No end marker: just skip it.
|
|
parsed += tag;
|
|
}
|
|
}
|
|
//
|
|
// Check for: Indented code block.
|
|
//
|
|
else if(tag.charAt(0) == "\n" || tag.charAt(0) == " ") {
|
|
// Indented code block: pass it unchanged, will be handled
|
|
// later.
|
|
parsed += tag;
|
|
}
|
|
//
|
|
// Check for: Opening Block level tag or
|
|
// Opening Context Block tag (like ins and del)
|
|
// used as a block tag (tag is alone on it's line).
|
|
//
|
|
else if (tag.match(new RegExp('^<(?:' + this.block_tags_re + ')\\b')) ||
|
|
(
|
|
tag.match(new RegExp('^<(?:' + this.context_block_tags_re + ')\\b')) &&
|
|
parsed.match(newline_before_re) &&
|
|
text.match(newline_after_re)
|
|
)
|
|
) {
|
|
// Need to parse tag and following text using the HTML parser.
|
|
t = this._hashHTMLBlocks_inHTML(tag + text, this.hashBlock, true);
|
|
block_text = t[0];
|
|
text = t[1];
|
|
|
|
// Make sure it stays outside of any paragraph by adding newlines.
|
|
parsed += "\n\n" + block_text + "\n\n";
|
|
}
|
|
//
|
|
// Check for: Clean tag (like script, math)
|
|
// HTML Comments, processing instructions.
|
|
//
|
|
else if(
|
|
tag.match(new RegExp('^<(?:' + this.clean_tags_re + ')\\b')) ||
|
|
tag.charAt(1) == '!' || tag.charAt(1) == '?'
|
|
) {
|
|
// Need to parse tag and following text using the HTML parser.
|
|
// (don't check for markdown attribute)
|
|
t = this._hashHTMLBlocks_inHTML(tag + text, this.hashClean, false);
|
|
block_text = t[0];
|
|
text = t[1];
|
|
|
|
parsed += block_text;
|
|
}
|
|
//
|
|
// Check for: Tag with same name as enclosing tag.
|
|
//
|
|
else if (enclosing_tag_re !== '' &&
|
|
// Same name as enclosing tag.
|
|
tag.match(new RegExp('^</?(?:' + enclosing_tag_re + ')\\b'))
|
|
) {
|
|
//
|
|
// Increase/decrease nested tag count.
|
|
//
|
|
if (tag.charAt(1) == '/') depth--;
|
|
else if (tag.charAt(tag.length - 2) != '/') depth++;
|
|
|
|
if(depth < 0) {
|
|
//
|
|
// Going out of parent element. Clean up and break so we
|
|
// return to the calling function.
|
|
//
|
|
text = tag + text;
|
|
break;
|
|
}
|
|
|
|
parsed += tag;
|
|
}
|
|
else {
|
|
parsed += tag;
|
|
}
|
|
} while(depth >= 0);
|
|
|
|
return [parsed, text];
|
|
};
|
|
|
|
/**
|
|
* Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
|
|
*
|
|
* * Calls $hash_method to convert any blocks.
|
|
* * Stops when the first opening tag closes.
|
|
* * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
|
|
* (it is not inside clean tags)
|
|
*
|
|
* Returns an array of that form: ( processed text , remaining text )
|
|
*/
|
|
MarkdownExtra_Parser.prototype._hashHTMLBlocks_inHTML = function(text, hash_method, md_attr) {
|
|
if(text === '') return ['', ''];
|
|
|
|
var matches;
|
|
|
|
// Regex to match `markdown` attribute inside of a tag.
|
|
var markdown_attr_re = new RegExp(
|
|
'\\s*' + // Eat whitespace before the `markdown` attribute
|
|
'markdown' +
|
|
'\\s*=\\s*' +
|
|
'(?:' +
|
|
'(["\'])' + // $1: quote delimiter
|
|
'(.*?)' + // $2: attribute value
|
|
'\\1' + // matching delimiter
|
|
'|' +
|
|
'([^\\s>]*)' + // $3: unquoted attribute value
|
|
')' +
|
|
'()' // $4: make $3 always defined (avoid warnings)
|
|
);
|
|
|
|
// Regex to match any tag.
|
|
var tag_re = new RegExp(
|
|
'(' + // $2: Capture hole tag.
|
|
'</?' + // Any opening or closing tag.
|
|
'[\\w:$]+' + // Tag name.
|
|
'(?:' +
|
|
'(?=[\\s"\'/a-zA-Z0-9])' + // Allowed characters after tag name.
|
|
'(?:' +
|
|
'(?=(' +
|
|
'".*?"|' + // Double quotes (can contain `>`)
|
|
'\'.*?\'|' + // Single quotes (can contain `>`)
|
|
'.+?' + // Anything but quotes and `>`.
|
|
'))\\4' +
|
|
')*?' +
|
|
')?' +
|
|
'>' + // End of tag.
|
|
'|' +
|
|
'<!--.*?-->' + // HTML Comment
|
|
'|' +
|
|
'<\\?.*?\\?>|<%.*?%>' + // Processing instruction
|
|
'|' +
|
|
'<!\\[CDATA\\[.*?\\]\\]>' + // CData Block
|
|
')'
|
|
);
|
|
|
|
var original_text = text; // Save original text in case of faliure.
|
|
|
|
var depth = 0; // Current depth inside the tag tree.
|
|
var block_text = ""; // Temporary text holder for current text.
|
|
var parsed = ""; // Parsed text that will be returned.
|
|
|
|
//
|
|
// Get the name of the starting tag.
|
|
// (This pattern makes $base_tag_name_re safe without quoting.)
|
|
//
|
|
var base_tag_name_re = "";
|
|
if(matches = text.match(/^<([\w:$]*)\b/)) {
|
|
base_tag_name_re = matches[1];
|
|
}
|
|
|
|
//
|
|
// Loop through every tag until we find the corresponding closing tag.
|
|
//
|
|
do {
|
|
//
|
|
// Split the text using the first $tag_match pattern found.
|
|
// Text before pattern will be first in the array, text after
|
|
// pattern will be at the end, and between will be any catches made
|
|
// by the pattern.
|
|
//
|
|
var parts_available = text.match(tag_re); //PREG_SPLIT_DELIM_CAPTURE);
|
|
// If end of $text has been reached. Stop loop.
|
|
if(!parts_available) {
|
|
//
|
|
// End of $text reached with unbalenced tag(s).
|
|
// In that case, we return original text unchanged and pass the
|
|
// first character as filtered to prevent an infinite loop in the
|
|
// parent function.
|
|
//
|
|
return [original_text.charAt(0), original_text.substr(1)];
|
|
}
|
|
var parts = [RegExp.leftContext, RegExp.lastMatch, RegExp.rightContext];
|
|
|
|
block_text += parts[0]; // Text before current tag.
|
|
var tag = parts[1]; // Tag to handle.
|
|
text = parts[2]; // Remaining text after current tag.
|
|
|
|
//
|
|
// Check for: Auto-close tag (like <hr/>)
|
|
// Comments and Processing Instructions.
|
|
//
|
|
if(tag.match(new RegExp('^</?(?:' + this.auto_close_tags_re + ')\\b')) ||
|
|
tag.charAt(1) == '!' || tag.charAt(1) == '?')
|
|
{
|
|
// Just add the tag to the block as if it was text.
|
|
block_text += tag;
|
|
}
|
|
else {
|
|
//
|
|
// Increase/decrease nested tag count. Only do so if
|
|
// the tag's name match base tag's.
|
|
//
|
|
if (tag.match(new RegExp('^</?' + base_tag_name_re + '\\b'))) {
|
|
if(tag.charAt(1) == '/') { depth--; }
|
|
else if(tag.charAt(tag.length - 2) != '/') { depth++; }
|
|
}
|
|
|
|
//
|
|
// Check for `markdown="1"` attribute and handle it.
|
|
//
|
|
var attr_m;
|
|
if(md_attr &&
|
|
(attr_m = tag.match(markdown_attr_re)) &&
|
|
(attr_m[2] + attr_m[3]).match(/^1|block|span$/))
|
|
{
|
|
// Remove `markdown` attribute from opening tag.
|
|
tag = tag.replace(markdown_attr_re, '');
|
|
|
|
// Check if text inside this tag must be parsed in span mode.
|
|
this.mode = attr_m[2] + attr_m[3];
|
|
var span_mode = this.mode == 'span' || this.mode != 'block' &&
|
|
tag.match(new RegExp('^<(?:' + this.contain_span_tags_re + ')\\b'));
|
|
|
|
// Calculate indent before tag.
|
|
var indent;
|
|
if (matches = block_text.match(/(?:^|\n)( *?)(?! ).*?$/)) {
|
|
//var strlen = this.utf8_strlen;
|
|
indent = matches[1].length; //strlen(matches[1], 'UTF-8');
|
|
} else {
|
|
indent = 0;
|
|
}
|
|
|
|
// End preceding block with this tag.
|
|
block_text += tag;
|
|
parsed += hash_method.call(this, block_text);
|
|
|
|
// Get enclosing tag name for the ParseMarkdown function.
|
|
// (This pattern makes $tag_name_re safe without quoting.)
|
|
matches = tag.match(/^<([\w:$]*)\b/);
|
|
var tag_name_re = matches[1];
|
|
|
|
// Parse the content using the HTML-in-Markdown parser.
|
|
var t = this._hashHTMLBlocks_inMarkdown(text, indent, tag_name_re, span_mode);
|
|
block_text = t[0];
|
|
text = t[1];
|
|
|
|
// Outdent markdown text.
|
|
if(indent > 0) {
|
|
block_text = block_text.replace(new RegExp('/^[ ]{1,' + indent + '}', 'm'), "");
|
|
}
|
|
|
|
// Append tag content to parsed text.
|
|
if (!span_mode) { parsed += "\n\n" + block_text + "\n\n"; }
|
|
else { parsed += block_text; }
|
|
|
|
// Start over a new block.
|
|
block_text = "";
|
|
}
|
|
else {
|
|
block_text += tag;
|
|
}
|
|
}
|
|
|
|
} while(depth > 0);
|
|
|
|
//
|
|
// Hash last block text that wasn't processed inside the loop.
|
|
//
|
|
parsed += hash_method.call(this, block_text);
|
|
|
|
return [parsed, text];
|
|
};
|
|
|
|
|
|
/**
|
|
* Called whenever a tag must be hashed when a function insert a "clean" tag
|
|
* in $text, it pass through this function and is automaticaly escaped,
|
|
* blocking invalid nested overlap.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.hashClean = function(text) {
|
|
return this.hashPart(text, 'C');
|
|
};
|
|
|
|
|
|
/**
|
|
* Redefined to add id attribute support.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.doHeaders = function(text) {
|
|
var self = this;
|
|
|
|
function _doHeaders_attr(attr) {
|
|
if('undefined' === typeof attr || attr == "") { return ""; }
|
|
return " id=\"" + attr + "\"";
|
|
}
|
|
|
|
// Setext-style headers:
|
|
// Header 1 {#header1}
|
|
// ========
|
|
//
|
|
// Header 2 {#header2}
|
|
// --------
|
|
|
|
text = text.replace(new RegExp(
|
|
'(^.+?)' + // $1: Header text
|
|
'(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // $2: Id attribute
|
|
'[ ]*\\n(=+|-+)[ ]*\\n+', // $3: Header footer
|
|
'mg'
|
|
), function(match, span, id, line) {
|
|
//console.log(match);
|
|
if(line == '-' && span.match(/^- /)) {
|
|
return match;
|
|
}
|
|
var level = line.charAt(0) == '=' ? 1 : 2;
|
|
var attr = _doHeaders_attr(id);
|
|
var block = "<h" + level + attr + ">" + self.runSpanGamut(span) + "</h" + level + ">";
|
|
return "\n" + self.hashBlock(block) + "\n\n";
|
|
});
|
|
|
|
// atx-style headers:
|
|
// # Header 1 {#header1}
|
|
// ## Header 2 {#header2}
|
|
// ## Header 2 with closing hashes ## {#header3}
|
|
// ...
|
|
// ###### Header 6 {#header2}
|
|
|
|
text = text.replace(new RegExp(
|
|
'^(\\#{1,6})' + // $1 = string of #\'s
|
|
'[ ]*' +
|
|
'(.+?)' + // $2 = Header text
|
|
'[ ]*' +
|
|
'\\#*' + // optional closing #\'s (not counted)
|
|
'(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?' + // id attribute
|
|
'\\n+',
|
|
'mg'
|
|
), function(match, hashes, span, id) {
|
|
//console.log(match);
|
|
var level = hashes.length;
|
|
var attr = _doHeaders_attr(id);
|
|
var block = "<h" + level + attr + ">" + self.runSpanGamut(span) + "</h" + level + ">";
|
|
return "\n" + self.hashBlock(block) + "\n\n";
|
|
});
|
|
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Form HTML tables.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.doTables = function(text) {
|
|
var self = this;
|
|
|
|
var less_than_tab = this.tab_width - 1;
|
|
|
|
var _doTable_callback = function(match, head, underline, content) {
|
|
//console.log(match);
|
|
// Remove any tailing pipes for each line.
|
|
head = head.replace(/[|] *$/m, '');
|
|
underline = underline.replace(/[|] *$/m, '');
|
|
content = content.replace(/[|] *$/m, '');
|
|
|
|
var attr = [];
|
|
|
|
// Reading alignement from header underline.
|
|
var separators = underline.split(/[ ]*[|][ ]*/);
|
|
var n;
|
|
for(n = 0; n < separators.length; n++) {
|
|
var s = separators[n];
|
|
if (s.match(/^ *-+: *$/)) { attr[n] = ' align="right"'; }
|
|
else if (s.match(/^ *:-+: *$/)) { attr[n] = ' align="center"'; }
|
|
else if (s.match(/^ *:-+ *$/)) { attr[n] = ' align="left"'; }
|
|
else { attr[n] = ''; }
|
|
}
|
|
|
|
// Parsing span elements, including code spans, character escapes,
|
|
// and inline HTML tags, so that pipes inside those gets ignored.
|
|
head = self.parseSpan(head);
|
|
var headers = head.split(/ *[|] */);
|
|
var col_count = headers.length;
|
|
|
|
// Write column headers.
|
|
var text = "<table>\n";
|
|
text += "<thead>\n";
|
|
text += "<tr>\n";
|
|
for(n = 0; n < headers.length; n++) {
|
|
var header = headers[n];
|
|
text += " <th" + attr[n] + ">" + self.runSpanGamut(self._php_trim(header)) + "</th>\n";
|
|
}
|
|
text += "</tr>\n";
|
|
text += "</thead>\n";
|
|
|
|
// Split content by row.
|
|
var rows = self._php_trim(content, "\n").split("\n");
|
|
|
|
text += "<tbody>\n";
|
|
for(var i = 0; i < rows.length; i++) {
|
|
var row = rows[i];
|
|
// Parsing span elements, including code spans, character escapes,
|
|
// and inline HTML tags, so that pipes inside those gets ignored.
|
|
row = self.parseSpan(row);
|
|
|
|
// Split row by cell.
|
|
var row_cells = row.split(/ *[|] */, col_count);
|
|
while(row_cells.length < col_count) { row_cells.push(''); }
|
|
|
|
text += "<tr>\n";
|
|
for(n = 0; n < row_cells.length; n++) {
|
|
var cell = row_cells[n];
|
|
text += " <td" + attr[n] + ">" + self.runSpanGamut(self._php_trim(cell)) + "</td>\n";
|
|
}
|
|
text += "</tr>\n";
|
|
}
|
|
text += "</tbody>\n";
|
|
text += "</table>";
|
|
|
|
return self.hashBlock(text) + "\n";
|
|
};
|
|
|
|
text = this.__wrapSTXETX__(text);
|
|
|
|
//
|
|
// Find tables with leading pipe.
|
|
//
|
|
// | Header 1 | Header 2
|
|
// | -------- | --------
|
|
// | Cell 1 | Cell 2
|
|
// | Cell 3 | Cell 4
|
|
//
|
|
text = text.replace(new RegExp(
|
|
'^' + // Start of a line
|
|
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
|
|
'[|]' + // Optional leading pipe (present)
|
|
'(.+)\\n' + // $1: Header row (at least one pipe)
|
|
|
|
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
|
|
'[|]([ ]*[-:]+[-| :]*)\\n' + // $2: Header underline
|
|
|
|
'(' + // $3: Cells
|
|
'(?:' +
|
|
'[ ]*' + // Allowed whitespace.
|
|
'[|].*\\n' + // Row content.
|
|
')*' +
|
|
')' +
|
|
'(?=\\n|\\x03)' , // Stop at final double newline.
|
|
'mg'
|
|
), function(match, head, underline, content) {
|
|
// Remove leading pipe for each row.
|
|
content = content.replace(/^ *[|]/m, '');
|
|
|
|
return _doTable_callback.call(this, match, head, underline, content);
|
|
});
|
|
|
|
//
|
|
// Find tables without leading pipe.
|
|
//
|
|
// Header 1 | Header 2
|
|
// -------- | --------
|
|
// Cell 1 | Cell 2
|
|
// Cell 3 | Cell 4
|
|
//
|
|
text = text.replace(new RegExp(
|
|
'^' + // Start of a line
|
|
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
|
|
'(\\S.*[|].*)\\n' + // $1: Header row (at least one pipe)
|
|
|
|
'[ ]{0,' + less_than_tab + '}' + // Allowed whitespace.
|
|
'([-:]+[ ]*[|][-| :]*)\\n' + // $2: Header underline
|
|
|
|
'(' + // $3: Cells
|
|
'(?:' +
|
|
'.*[|].*\\n' + // Row content
|
|
')*' +
|
|
')' +
|
|
'(?=\\n|\\x03)' , // Stop at final double newline.
|
|
'mg'
|
|
), _doTable_callback);
|
|
|
|
text = this.__unwrapSTXETX__(text);
|
|
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Form HTML definition lists.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.doDefLists = function(text) {
|
|
var self = this;
|
|
|
|
var less_than_tab = this.tab_width - 1;
|
|
|
|
// Re-usable pattern to match any entire dl list:
|
|
var whole_list_re = '(?:' +
|
|
'(' + // $1 = whole list
|
|
'(' + // $2
|
|
'[ ]{0,' + less_than_tab + '}' +
|
|
'((?:[ \\t]*\\S.*\\n)+)' + // $3 = defined term
|
|
// [porting note] Original regex from PHP is
|
|
// (?>.*\S.*\n), which matches a line with at
|
|
// least one non-space character. Change the
|
|
// first .* to [ \t]* stops unneccessary
|
|
// backtracking hence improves performance
|
|
'\\n?' +
|
|
'[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
|
|
')' +
|
|
'([\\s\\S]+?)' +
|
|
'(' + // $4
|
|
'(?=\\0x03)' + // \z
|
|
'|' +
|
|
'(?=' + // [porting note] Our regex will consume leading
|
|
// newline characters so we will leave the newlines
|
|
// here for the next definition
|
|
'\\n{2,}' +
|
|
'(?=\\S)' +
|
|
'(?!' + // Negative lookahead for another term
|
|
'[ ]{0,' + less_than_tab + '}' +
|
|
'(?:\\S.*\\n)+?' + // defined term
|
|
'\\n?' +
|
|
'[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
|
|
')' +
|
|
'(?!' + // Negative lookahead for another definition
|
|
'[ ]{0,' + less_than_tab + '}:[ ]+' + // colon starting definition
|
|
')' +
|
|
')' +
|
|
')' +
|
|
')' +
|
|
')'; // mx
|
|
|
|
text = this.__wrapSTXETX__(text);
|
|
text = text.replace(new RegExp(
|
|
'(\\x02\\n?|\\n\\n)' +
|
|
whole_list_re, 'mg'
|
|
), function(match, pre, list) {
|
|
//console.log(match);
|
|
// Re-usable patterns to match list item bullets and number markers:
|
|
// [portiong note] changed to list = $2 in order to reserve previously \n\n.
|
|
|
|
// Turn double returns into triple returns, so that we can make a
|
|
// paragraph for the last item in a list, if necessary:
|
|
var result = self._php_trim(self.processDefListItems(list));
|
|
result = "<dl>\n" + result + "\n</dl>";
|
|
return pre + self.hashBlock(result) + "\n\n";
|
|
});
|
|
text = this.__unwrapSTXETX__(text);
|
|
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Process the contents of a single definition list, splitting it
|
|
* into individual term and definition list items.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.processDefListItems = function(list_str) {
|
|
var self = this;
|
|
|
|
var less_than_tab = this.tab_width - 1;
|
|
|
|
list_str = this.__wrapSTXETX__(list_str);
|
|
|
|
// trim trailing blank lines:
|
|
list_str = list_str.replace(/\n{2,}(?=\\x03)/, "\n");
|
|
|
|
// Process definition terms.
|
|
list_str = list_str.replace(new RegExp(
|
|
'(\\x02\\n?|\\n\\n+)' + // leading line
|
|
'(' + // definition terms = $1
|
|
'[ ]{0,' + less_than_tab + '}' + // leading whitespace
|
|
'(?![:][ ]|[ ])' + // negative lookahead for a definition
|
|
// mark (colon) or more whitespace.
|
|
'(?:\\S.*\\n)+?' + // actual term (not whitespace).
|
|
')' +
|
|
'(?=\\n?[ ]{0,3}:[ ])' , // lookahead for following line feed
|
|
// with a definition mark.
|
|
'mg'
|
|
), function(match, pre, terms_str) {
|
|
// [portiong note] changed to list = $2 in order to reserve previously \n\n.
|
|
var terms = self._php_trim(terms_str).split("\n");
|
|
var text = '';
|
|
for (var i = 0; i < terms.length; i++) {
|
|
var term = terms[i];
|
|
term = self.runSpanGamut(self._php_trim(term));
|
|
text += "\n<dt>" + term + "</dt>";
|
|
}
|
|
return text + "\n";
|
|
});
|
|
|
|
// Process actual definitions.
|
|
list_str = list_str.replace(new RegExp(
|
|
'\\n(\\n+)?' + // leading line = $1
|
|
'(' + // marker space = $2
|
|
'[ ]{0,' + less_than_tab + '}' + // whitespace before colon
|
|
'[:][ ]+' + // definition mark (colon)
|
|
')' +
|
|
'([\\s\\S]+?)' + // definition text = $3
|
|
// [porting note] Maybe no trailing
|
|
// newlines in our version, changed the
|
|
// following line from \n+ to \n*.
|
|
'(?=\\n*' + // stop at next definition mark,
|
|
'(?:' + // next term or end of text
|
|
'\\n[ ]{0,' + less_than_tab + '}[:][ ]|' + // [porting note] do not match
|
|
// colon in the middle of a line
|
|
'<dt>|\\x03' + // \z
|
|
')' +
|
|
')',
|
|
'mg'
|
|
), function(match, leading_line, marker_space, def) {
|
|
if (leading_line || def.match(/\n{2,}/)) {
|
|
// Replace marker with the appropriate whitespace indentation
|
|
def = self._php_str_repeat(' ', marker_space.length) + def;
|
|
def = self.runBlockGamut(self.outdent(def + "\n\n"));
|
|
def = "\n" + def + "\n";
|
|
}
|
|
else {
|
|
def = self._php_rtrim(def);
|
|
def = self.runSpanGamut(self.outdent(def));
|
|
}
|
|
|
|
return "\n<dd>" + def + "</dd>\n";
|
|
});
|
|
|
|
list_str = this.__unwrapSTXETX__(list_str);
|
|
|
|
return list_str;
|
|
};
|
|
|
|
/**
|
|
* Adding the fenced code block syntax to regular Markdown:
|
|
*
|
|
* ~~~
|
|
* Code block
|
|
* ~~~
|
|
*/
|
|
MarkdownExtra_Parser.prototype.doFencedCodeBlocks = function(text) {
|
|
var self = this;
|
|
|
|
var less_than_tab = this.tab_width;
|
|
|
|
text = this.__wrapSTXETX__(text);
|
|
text = text.replace(new RegExp(
|
|
'(?:\\n|\\x02)' +
|
|
// 1: Opening marker
|
|
'(' +
|
|
'~{3,}' + // Marker: three tilde or more.
|
|
')' +
|
|
'[ ]*\\n' + // Whitespace and newline following marker.
|
|
|
|
// 2: Content
|
|
'(' +
|
|
'(?:' +
|
|
'(?=(' +
|
|
'(?!\\1[ ]*\\n)' + // Not a closing marker.
|
|
'.*\\n+' +
|
|
'))\\3' +
|
|
')+' +
|
|
')' +
|
|
|
|
// Closing marker.
|
|
'\\1[ ]*\\n',
|
|
"mg"
|
|
), function(match, m1, codeblock) {
|
|
codeblock = self._php_htmlspecialchars_ENT_NOQUOTES(codeblock);
|
|
codeblock = codeblock.replace(/^\n+/, function(match) {
|
|
return self._php_str_repeat("<br" + self.empty_element_suffix, match.length);
|
|
});
|
|
codeblock = "<pre><code>" + codeblock + "</code></pre>";
|
|
return "\n\n" + self.hashBlock(codeblock) + "\n\n";
|
|
});
|
|
text = this.__unwrapSTXETX__(text);
|
|
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Params:
|
|
* $text - string to process with html <p> tags
|
|
*/
|
|
MarkdownExtra_Parser.prototype.formParagraphs = function(text) {
|
|
|
|
// Strip leading and trailing lines:
|
|
text = this.__wrapSTXETX__(text);
|
|
text = text.replace(/(?:\x02)\n+|\n+(?:\x03)/g, "");
|
|
text = this.__unwrapSTXETX__(text);
|
|
|
|
var grafs = text.split(/\n{2,}/m);
|
|
//preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
|
|
|
|
//
|
|
// Wrap <p> tags and unhashify HTML blocks
|
|
//
|
|
for(var i = 0; i < grafs.length; i++) {
|
|
var value = grafs[i];
|
|
if(value == "") {
|
|
// [porting note]
|
|
// This case is replacement for PREG_SPLIT_NO_EMPTY.
|
|
continue;
|
|
}
|
|
value = this._php_trim(this.runSpanGamut(value));
|
|
|
|
// Check if this should be enclosed in a paragraph.
|
|
// Clean tag hashes & block tag hashes are left alone.
|
|
var is_p = !value.match(/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/);
|
|
|
|
if (is_p) {
|
|
value = "<p>" + value + "</p>";
|
|
}
|
|
grafs[i] = value;
|
|
}
|
|
|
|
// Join grafs in one text, then unhash HTML tags.
|
|
text = grafs.join("\n\n");
|
|
|
|
// Finish by removing any tag hashes still present in $text.
|
|
text = this.unhash(text);
|
|
|
|
return text;
|
|
};
|
|
|
|
// ### Footnotes
|
|
|
|
/**
|
|
* Strips link definitions from text, stores the URLs and titles in
|
|
* hash references.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.stripFootnotes = function(text) {
|
|
var self = this;
|
|
|
|
var less_than_tab = this.tab_width - 1;
|
|
|
|
// Link defs are in the form: [^id]: url "optional title"
|
|
text = text.replace(new RegExp(
|
|
'^[ ]{0,' + less_than_tab + '}\\[\\^(.+?)\\][ ]?:' + // note_id = $1
|
|
'[ ]*' +
|
|
'\\n?' + // maybe *one* newline
|
|
'(' + // text = $2 (no blank lines allowed)
|
|
'(?:' +
|
|
'.+' + // actual text
|
|
'|' +
|
|
'\\n' + // newlines but
|
|
'(?!\\[\\^.+?\\]:\\s)' + // negative lookahead for footnote marker.
|
|
'(?!\\n+[ ]{0,3}\\S)' + // ensure line is not blank and followed
|
|
// by non-indented content
|
|
')*' +
|
|
')',
|
|
"mg"
|
|
), function(match, m1, m2) {
|
|
var note_id = self.fn_id_prefix + m1;
|
|
self.footnotes[note_id] = self.outdent(m2);
|
|
return ''; //# String that will replace the block
|
|
});
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Replace footnote references in $text [^id] with a special text-token
|
|
* which will be replaced by the actual footnote marker in appendFootnotes.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.doFootnotes = function(text) {
|
|
if (!this.in_anchor) {
|
|
text = text.replace(/\[\^(.+?)\]/g, "F\x1Afn:$1\x1A:");
|
|
}
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Append footnote list to text.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.appendFootnotes = function(text) {
|
|
var self = this;
|
|
|
|
var _appendFootnotes_callback = function(match, m1) {
|
|
var node_id = self.fn_id_prefix + m1;
|
|
|
|
// Create footnote marker only if it has a corresponding footnote *and*
|
|
// the footnote hasn't been used by another marker.
|
|
if (node_id in self.footnotes) {
|
|
// Transfert footnote content to the ordered list.
|
|
self.footnotes_ordered.push([node_id, self.footnotes[node_id]]);
|
|
delete self.footnotes[node_id];
|
|
|
|
var num = self.footnote_counter++;
|
|
var attr = " rel=\"footnote\"";
|
|
if (self.fn_link_class != "") {
|
|
var classname = self.fn_link_class;
|
|
classname = self.encodeAttribute(classname);
|
|
attr += " class=\"" + classname + "\"";
|
|
}
|
|
if (self.fn_link_title != "") {
|
|
var title = self.fn_link_title;
|
|
title = self.encodeAttribute(title);
|
|
attr += " title=\"" + title +"\"";
|
|
}
|
|
|
|
attr = attr.replace(/%%/g, num);
|
|
node_id = self.encodeAttribute(node_id);
|
|
|
|
return "<sup id=\"fnref:" + node_id + "\">" +
|
|
"<a href=\"#fn:" + node_id + "\"" + attr + ">" + num + "</a>" +
|
|
"</sup>";
|
|
}
|
|
|
|
return "[^" + m1 + "]";
|
|
};
|
|
|
|
text = text.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback);
|
|
|
|
if (this.footnotes_ordered.length > 0) {
|
|
text += "\n\n";
|
|
text += "<div class=\"footnotes\">\n";
|
|
text += "<hr" + this.empty_element_suffix + "\n";
|
|
text += "<ol>\n\n";
|
|
|
|
var attr = " rev=\"footnote\"";
|
|
if (this.fn_backlink_class != "") {
|
|
var classname = this.fn_backlink_class;
|
|
classname = this.encodeAttribute(classname);
|
|
attr += " class=\"" + classname + "\"";
|
|
}
|
|
if (this.fn_backlink_title != "") {
|
|
var title = this.fn_backlink_title;
|
|
title = this.encodeAttribute(title);
|
|
attr += " title=\"" + title + "\"";
|
|
}
|
|
var num = 0;
|
|
|
|
while (this.footnotes_ordered.length > 0) {
|
|
var head = this.footnotes_ordered.shift();
|
|
var note_id = head[0];
|
|
var footnote = head[1];
|
|
|
|
footnote += "\n"; // Need to append newline before parsing.
|
|
footnote = this.runBlockGamut(footnote + "\n");
|
|
footnote = footnote.replace(/F\x1Afn:(.*?)\x1A:/g, _appendFootnotes_callback);
|
|
|
|
attr = attr.replace(/%%/g, ++num);
|
|
note_id = this.encodeAttribute(note_id);
|
|
|
|
// Add backlink to last paragraph; create new paragraph if needed.
|
|
var backlink = "<a href=\"#fnref:" + note_id + "\"" + attr + ">↩</a>";
|
|
if (footnote.match(/<\/p>$/)) {
|
|
footnote = footnote.substr(0, footnote.length - 4) + " " + backlink + "</p>";
|
|
} else {
|
|
footnote += "\n\n<p>" + backlink + "</p>";
|
|
}
|
|
|
|
text += "<li id=\"fn:" + note_id + "\">\n";
|
|
text += footnote + "\n";
|
|
text += "</li>\n\n";
|
|
}
|
|
|
|
text += "</ol>\n";
|
|
text += "</div>";
|
|
}
|
|
return text;
|
|
};
|
|
|
|
//### Abbreviations ###
|
|
|
|
/**
|
|
* Strips abbreviations from text, stores titles in hash references.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.stripAbbreviations = function(text) {
|
|
var self = this;
|
|
|
|
var less_than_tab = this.tab_width - 1;
|
|
|
|
// Link defs are in the form: [id]*: url "optional title"
|
|
text = text.replace(new RegExp(
|
|
'^[ ]{0,' + less_than_tab + '}\\*\\[(.+?)\\][ ]?:' + // abbr_id = $1
|
|
'(.*)', // text = $2 (no blank lines allowed)
|
|
"m"
|
|
), function(match, abbr_word, abbr_desc) {
|
|
if (self.abbr_word_re != '') {
|
|
self.abbr_word_re += '|';
|
|
}
|
|
self.abbr_word_re += self._php_preg_quote(abbr_word);
|
|
self.abbr_desciptions[abbr_word] = self._php_trim(abbr_desc);
|
|
return ''; // String that will replace the block
|
|
});
|
|
return text;
|
|
};
|
|
|
|
/**
|
|
* Find defined abbreviations in text and wrap them in <abbr> elements.
|
|
*/
|
|
MarkdownExtra_Parser.prototype.doAbbreviations = function(text) {
|
|
var self = this;
|
|
|
|
if (this.abbr_word_re) {
|
|
// cannot use the /x modifier because abbr_word_re may
|
|
// contain significant spaces:
|
|
text = text.replace(new RegExp(
|
|
'(^|[^\\w\\x1A])' +
|
|
'(' + this.abbr_word_re + ')' +
|
|
'(?![\\w\\x1A])'
|
|
), function(match, prev, abbr) {
|
|
if (abbr in self.abbr_desciptions) {
|
|
var desc = self.abbr_desciptions[abbr];
|
|
if (!desc || desc == "") {
|
|
return self.hashPart("<abbr>" + abbr + "</abbr>");
|
|
} else {
|
|
desc = self.encodeAttribute(desc);
|
|
return self.hashPart("<abbr title=\"" + desc + "\">" + abbr + "</abbr>");
|
|
}
|
|
} else {
|
|
return match;
|
|
}
|
|
});
|
|
}
|
|
return text;
|
|
};
|
|
|
|
|
|
/**
|
|
* Export to Node.js
|
|
*/
|
|
this.Markdown = Markdown;
|
|
this.Markdown_Parser = Markdown_Parser;
|
|
this.MarkdownExtra_Parser = MarkdownExtra_Parser;
|
|
|