-
Notifications
You must be signed in to change notification settings - Fork 3.5k
move processing to output function #1227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -554,68 +554,9 @@ inline.normal = merge({}, inline); | |
| inline.pedantic = merge({}, inline.normal, { | ||
| strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, | ||
| em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/, | ||
| /* Original link re: /^!?\[(label)\]\(\s*<?([\s\S]*?)>?(?:\s+(['"][\s\S]*?['"]))?\s*\)/ | ||
| * This captures the spec reasonably well but is vulnerable to REDOS. | ||
| * Instead we use a custom parser that follows the RegExp.exec semantics. */ | ||
| link: { | ||
| exec: function (s) { | ||
| // [TEXT](DESTINATION) | ||
| var generalLinkRe = edit(/^!?\[(label)\]\((.*?)\)/) | ||
| .replace('label', inline._label) | ||
| .getRegex(); | ||
|
|
||
| // destination: DESTINATION from generalLinkRe | ||
| // returns [destination, title]: no angle-brackets on destination, no quotes on title | ||
| function splitIntoDestinationAndTitle (destination) { | ||
| function unwrapAngleBrackets (str) { | ||
| if (str.match(/^<.*>$/)) { | ||
| str = str.slice(1, -1); | ||
| } | ||
| return str; | ||
| } | ||
|
|
||
| // Valid DESTINATIONs, in decreasing specificity. | ||
| var destinationAndTitleRe = /^([^'"(]*[^\s])\s+(['"(].*['")])/; | ||
| var destinationRe = /^(<?[\s\S]*>?)/; | ||
| var parsingRegexes = [destinationAndTitleRe, destinationRe]; | ||
|
|
||
| var match = false; | ||
| for (var i = 0; i < parsingRegexes.length; i++) { | ||
| match = parsingRegexes[i].exec(destination); | ||
| if (match) { | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| if (!match) { | ||
| return null; | ||
| } | ||
|
|
||
| var dest = match[1]; | ||
| var title = match[2] || ''; // Not all parsingRegexes have 2 groups. | ||
|
|
||
| // Format dest. | ||
| dest = dest.trim(); | ||
| dest = unwrapAngleBrackets(dest); | ||
|
|
||
| return [dest, title]; | ||
| } | ||
|
|
||
| var fullMatch = generalLinkRe.exec(s); | ||
| if (!fullMatch) { | ||
| return null; | ||
| } | ||
|
|
||
| var text = fullMatch[1]; | ||
| var destination = fullMatch[2]; | ||
|
|
||
| var destinationAndTitle = splitIntoDestinationAndTitle(destination); | ||
| if (!destinationAndTitle) { | ||
| return null; | ||
| } | ||
| return [fullMatch[0], text, destinationAndTitle[0], destinationAndTitle[1]]; | ||
| } | ||
| }, | ||
| link: edit(/^!?\[(label)\]\((.*?)\)/) | ||
| .replace('label', inline._label) | ||
| .getRegex(), | ||
| reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) | ||
| .replace('label', inline._label) | ||
| .getRegex() | ||
|
|
@@ -762,8 +703,18 @@ InlineLexer.prototype.output = function(src) { | |
| src = src.substring(cap[0].length); | ||
| this.inLink = true; | ||
| href = cap[2]; | ||
| href = href[0] === '<' ? href.substring(1, href.length - 1) : href; | ||
| title = cap[3] ? cap[3].substring(1, cap[3].length - 1) : cap[3]; | ||
| if (this.options.pedantic) { | ||
| link = /^([^'"(]*[^\s])\s+(['"(].*['")])/.exec(href); | ||
|
||
|
|
||
| if (link) { | ||
| href = link[1]; | ||
| title = link[2].trim().slice(1, -1); | ||
|
||
| } | ||
| href = href.trim(); | ||
| } else { | ||
| title = cap[3] ? cap[3].slice(1, -1) : cap[3]; | ||
| } | ||
| href = href.replace(/^<([\s\S]*)>$/, '$1'); | ||
| out += this.outputLink(cap, { | ||
| href: InlineLexer.escapes(href), | ||
| title: InlineLexer.escapes(title) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| <p><a href="%3Ctest">URL</a></p> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| [URL](<test) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't test for pedantic much, and I don't see it well documented anywhere. The only docs I could find are in the man page.
Can you explain when I should test for pedantic in the future?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I understand this code right, without
pedanticwe will sethrefbut nottitle. If my understanding is correct, why is this behavior desirable?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
pedanticflag means follow the original 2004 spec from John Gruber (Daring Fireball).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The regexes are grouped based on the options. The link regex that you fixed was in the pedantic group
The title is set in the else clause. The regex for a non-pedantic link is
/^!?\[(label)\]\(href(?:\s+(title))?\s*\)/which is extremely complex when constructed and probably also error prone.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here is the actual non-pedantic link regex:
/^!?\[((?:\[[^\[\]]*\]|\\[\[\]]?|`[^`]*`|[^\[\]\\])*?)\]\(\s*(<(?:\\[<>]?|[^\s<>\\])*>|(?:\\[()]?|\([^\s\x00-\x1f()\\]*\)|[^\s\x00-\x1f()\\])*?)(?:\s+("(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)))?\s*\)/There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The link regex is checking for a title. If no title is found then the href is already set to the whole string inside the parentheses and there is no title.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The regex that you used before when there was no title (
/^(<?[\s\S]*>?)/) literally matches any string and sincehrefis already set to the whole string there is no need to change it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Further down we then set the
titlefield in the returned object based on the (undefined?) value of title. Do we want to set a default value fortitle, e.g.''?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My two cents would be an empty value of expected type. Avoid null and undefined checks and possibilities.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see what you are saying. Yes title should probably be defined as
nullor''looks like commonmark doesn't differentiate between the title being an empty string or no title: demo
so I will set it to an empty string.