Skip to content

Commit 33d55cd

Browse files
committed
Merge pull request fb55#28 from myndzi/master
basic support for implied close tags, bugfix for attribute values containing a slash at the end being recognized as self-closing tags.
2 parents f707bd7 + fe6b8d6 commit 33d55cd

File tree

3 files changed

+193
-11
lines changed

3 files changed

+193
-11
lines changed

lib/Parser.js

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,30 @@ function Parser(cbs, options){
1313
}
1414

1515
//Regular expressions used for cleaning up and parsing (stateless)
16-
var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
16+
17+
/* http://dev.w3.org/html5/html-author/#attributes
18+
* - Whitespace is permitted after the tag name, but it is not permitted before the tag name.
19+
* - Attribute names must consist of one or more characters other than the space characters,
20+
* control characters, NULL, one of the characters: double quote ("), single quote ('),
21+
* greater-than sign (>), solidus (/), equals sign (=), nor any characters that are not defined by Unicode.
22+
* - An empty attribute is one where the value has been omitted. (<input disabled>...</input>
23+
* - An unquoted attribute value must not contain any literal space characters, any of the characters:
24+
* double quote ("), apostrophe ('), equals sign (=), less-than sign (<), greater-than sign (>),
25+
* or grave accent (`), and the value must not be the empty string.
26+
* - There may be space characters between the attribute name and the equals sign (=),
27+
* and between that and the attribute value.
28+
* - Double-quoted attributes must not contain any double-quote characters or ambiguous ampersands.
29+
* - Single-quoted attributes must not contain any single-quote characters or ambiguous ampersands.
30+
*/
31+
// element name: (<[^<& ]+)
32+
// attribute name: ( [^"'=>\/]+)
33+
// attribute value: (\s*=\s*(?:
34+
// "([^"]*)"|
35+
// '([^']*)'|
36+
// [^\s"'=<>`]+)
37+
// tag end: (?=\s|\/|$)
38+
39+
var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+))|(?=\s)|\/|$)/g,
1740
_reTail = /\s|\/|$/;
1841

1942
var defaultOpts = {
@@ -44,6 +67,32 @@ var defaultCbs = {
4467
*/
4568
};
4669

70+
var formTags = {
71+
input: true,
72+
option: true,
73+
optgroup: true,
74+
select: true,
75+
button: true,
76+
datalist: true,
77+
textarea: true
78+
};
79+
var openImpliesClose = {
80+
tr : { tr:true, th:true, td:true },
81+
th : { th:true },
82+
td : { thead:true, td:true },
83+
body : { head:true, link:true, script:true },
84+
li : { li:true },
85+
p : { p:true },
86+
select : formTags,
87+
input : formTags,
88+
output : formTags,
89+
button : formTags,
90+
datalist: formTags,
91+
textarea: formTags,
92+
option : { option:true },
93+
optgroup: { optgroup:true }
94+
};
95+
4796
//Parses a complete HTML and pushes it to the handler
4897
Parser.prototype.parseComplete = function(data){
4998
this.reset();
@@ -303,25 +352,32 @@ var parseAttributes = function(data, lcNames){
303352

304353
Parser.prototype._processOpenTag = function(data){
305354
var name = this._parseTagName(data),
306-
type = ElementType.Tag;
355+
attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
356+
type = ElementType.Tag;
307357

308358
if(this._options.xmlMode){ /*do nothing*/ }
309359
else if(name === "script") type = ElementType.Script;
310360
else if(name === "style") type = ElementType.Style;
311-
312-
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
313-
if(this._cbs.onopentag){
314-
this._cbs.onopentag(name, parseAttributes(
315-
data, this._options.lowerCaseAttributeNames
316-
));
361+
if (!this._options.xmlMode && name in openImpliesClose) {
362+
var el;
363+
while ((el = this._stack[this._stack.length-1]) in openImpliesClose[name]) {
364+
this._processCloseTag(el);
365+
}
317366
}
367+
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
368+
if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
318369
if(this._cbs.onattribute){
319370
this._parseAttributes(data, this._options.lowerCaseAttributeNames);
320371
}
321-
if(this._cbs.onopentagend) this._cbs.onopentagend();
322372

323373
//If tag self-terminates, add an explicit, separate closing tag
324-
if(data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
374+
/* http://dev.w3.org/html5/html-author/#tags
375+
* In XHTML, self-closing tags are valid but attribute values must be quoted.
376+
* In HTML, self-closing tags must be either void elements or foreign elements.
377+
* Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
378+
* Foreign elements use XML rules
379+
*/
380+
if((!this._options.xmlMode && name in emptyTags) || (data.substr(-1) === "/" && data.replace(_reAttrib, "").substr(-1) === "/")){
325381
if(this._cbs.onclosetag) this._cbs.onclosetag(name);
326382
} else {
327383
if(type !== ElementType.Tag){
@@ -338,4 +394,4 @@ Parser.prototype._handleError = function(error){
338394
else throw error;
339395
};
340396

341-
module.exports = Parser;
397+
module.exports = Parser;

tests/Events/07-self-closing.json

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"name": "Self-closing tags",
3+
"options": {
4+
"handler": {
5+
6+
},
7+
"parser": {
8+
9+
}
10+
},
11+
"html": "<a href=http://test.com/>Foo</a><hr />",
12+
"expected": [
13+
{
14+
"event": "opentagname",
15+
"data": [
16+
"a"
17+
]
18+
},
19+
{
20+
"event": "opentag",
21+
"data": [
22+
"a",
23+
{
24+
"href": "http://test.com/"
25+
}
26+
]
27+
},
28+
{
29+
"event": "attribute",
30+
"data": [
31+
"href",
32+
"http://test.com/"
33+
]
34+
},
35+
{
36+
"event": "text",
37+
"data": [
38+
"Foo"
39+
]
40+
},
41+
{
42+
"event": "closetag",
43+
"data": [
44+
"a"
45+
]
46+
},
47+
{
48+
"event": "opentagname",
49+
"data": [
50+
"hr"
51+
]
52+
},
53+
{
54+
"event": "opentag",
55+
"data": [
56+
"hr",
57+
{}
58+
]
59+
},
60+
{
61+
"event": "closetag",
62+
"data": [
63+
"hr"
64+
]
65+
}
66+
]
67+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"name": "Implicit close tags",
3+
"options": {},
4+
"html": "<ol><li class=test><div><table style=width:100%><tr><td colspan=2><h3>Heading</h3><tr><td><div>Div</div><td><div>Div2</div></table></div><li><div><h3>Heading 2</h3></div></li></ol>",
5+
"expected": [
6+
{ "event": "opentagname", "data": [ "ol" ] },
7+
{ "event": "opentag", "data": [ "ol", {} ] },
8+
{ "event": "opentagname", "data": [ "li" ] },
9+
{ "event": "opentag", "data": [ "li", { "class": "test" } ] },
10+
{ "event": "attribute", "data": [ "class", "test" ] },
11+
{ "event": "opentagname", "data": [ "div" ] },
12+
{ "event": "opentag", "data": [ "div", {} ] },
13+
{ "event": "opentagname", "data": [ "table" ] },
14+
{ "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
15+
{ "event": "attribute", "data": [ "style", "width:100%" ] },
16+
{ "event": "opentagname", "data": [ "tr" ] },
17+
{ "event": "opentag", "data": [ "tr", {} ] },
18+
{ "event": "opentagname", "data": [ "td" ] },
19+
{ "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
20+
{ "event": "attribute", "data": [ "colspan", "2" ] },
21+
{ "event": "opentagname", "data": [ "h3" ] },
22+
{ "event": "opentag", "data": [ "h3", {} ] },
23+
{ "event": "text", "data": [ "Heading" ] },
24+
{ "event": "closetag", "data": [ "h3" ] },
25+
{ "event": "closetag", "data": [ "td" ] },
26+
{ "event": "closetag", "data": [ "tr" ] },
27+
{ "event": "opentagname", "data": [ "tr" ] },
28+
{ "event": "opentag", "data": [ "tr", {} ] },
29+
{ "event": "opentagname", "data": [ "td" ] },
30+
{ "event": "opentag", "data": [ "td", {} ] },
31+
{ "event": "opentagname", "data": [ "div" ] },
32+
{ "event": "opentag", "data": [ "div", {} ] },
33+
{ "event": "text", "data": [ "Div" ] },
34+
{ "event": "closetag", "data": [ "div" ] },
35+
{ "event": "closetag", "data": [ "td" ] },
36+
{ "event": "opentagname", "data": [ "td" ] },
37+
{ "event": "opentag", "data": [ "td", {} ] },
38+
{ "event": "opentagname", "data": [ "div" ] },
39+
{ "event": "opentag", "data": [ "div", {} ] },
40+
{ "event": "text", "data": [ "Div2" ] },
41+
{ "event": "closetag", "data": [ "div" ] },
42+
{ "event": "closetag", "data": [ "td" ] },
43+
{ "event": "closetag", "data": [ "tr" ] },
44+
{ "event": "closetag", "data": [ "table" ] },
45+
{ "event": "closetag", "data": [ "div" ] },
46+
{ "event": "closetag", "data": [ "li" ] },
47+
{ "event": "opentagname", "data": [ "li" ] },
48+
{ "event": "opentag", "data": [ "li", {} ] },
49+
{ "event": "opentagname", "data": [ "div" ] },
50+
{ "event": "opentag", "data": [ "div", {} ] },
51+
{ "event": "opentagname", "data": [ "h3" ] },
52+
{ "event": "opentag", "data": [ "h3", {} ] },
53+
{ "event": "text", "data": [ "Heading 2" ] },
54+
{ "event": "closetag", "data": [ "h3" ] },
55+
{ "event": "closetag", "data": [ "div" ] },
56+
{ "event": "closetag", "data": [ "li" ] },
57+
{ "event": "closetag", "data": [ "ol" ] }
58+
]
59+
}

0 commit comments

Comments
 (0)