Skip to content

Commit a4bdced

Browse files
committed
fix #546: Support complex entity value
1 parent a874ce7 commit a4bdced

File tree

2 files changed

+142
-71
lines changed

2 files changed

+142
-71
lines changed

spec/entities_spec.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,4 +532,50 @@ describe("XMLParser External Entites", function() {
532532

533533
expect(result).toEqual(expected);
534534
});
535+
536+
fit("should support entites with tags in content", function() {
537+
const xmlData = `
538+
<?xml version="1.0" encoding="utf-8"?>
539+
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" [
540+
<!ENTITY Smile "
541+
<rect x='.5' y='.5' width='29' height='39' fill='black' stroke='red'/>
542+
<g transform='translate(0, 5)'>
543+
<circle cx='15' cy='15' r='10' fill='yellow'/>
544+
<circle cx='12' cy='12' r='1.5' fill='black'/>
545+
<circle cx='17' cy='12' r='1.5' fill='black'/>
546+
<path d='M 10 19 L 15 23 20 19' stroke='black' stroke-width='2'/></g>"
547+
>
548+
]>
549+
<svg width="850px" height="700px" version="1.1" xmlns="http://www.w3.org/2000/svg">
550+
<g transform="matrix(16,0,0,16,0,0)">&Smile;</g></svg> `;
551+
552+
const expected = {
553+
"?xml": {
554+
"version": "1.0",
555+
"encoding": "utf-8"
556+
},
557+
"svg": {
558+
"g": {
559+
"#text": " \n \t<rect x='.5' y='.5' width='29' height='39' fill='black' stroke='red'/>\n\t\t<g transform='translate(0, 5)'> \n\t\t\t<circle cx='15' cy='15' r='10' fill='yellow'/>\n\t\t\t<circle cx='12' cy='12' r='1.5' fill='black'/>\n\t\t\t<circle cx='17' cy='12' r='1.5' fill='black'/>\n\t\t\t<path d='M 10 19 L 15 23 20 19' stroke='black' stroke-width='2'/></g>",
560+
"transform": "matrix(16,0,0,16,0,0)"
561+
},
562+
"width": "850px",
563+
"height": "700px",
564+
"version": "1.1",
565+
"xmlns": "http://www.w3.org/2000/svg"
566+
}
567+
};
568+
569+
const options = {
570+
attributeNamePrefix: "",
571+
ignoreAttributes: false,
572+
processEntities: true,
573+
// preserveOrder: true
574+
};
575+
const parser = new XMLParser(options);
576+
let result = parser.parse(xmlData);
577+
// console.log(JSON.stringify(result,null,4));
578+
579+
expect(result).toEqual(expected);
580+
});
535581
});

src/xmlparser/DocTypeReader.js

Lines changed: 96 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -11,80 +11,34 @@ function readDocType(xmlData, i){
1111
{
1212
i = i+9;
1313
let angleBracketsCount = 1;
14-
let hasBody = false, entity = false, comment = false;
14+
let hasBody = false, comment = false;
1515
let exp = "";
1616
for(;i<xmlData.length;i++){
17-
if (xmlData[i] === '<' && !comment) {
18-
if( hasBody &&
19-
xmlData[i+1] === '!' &&
20-
xmlData[i+2] === 'E' &&
21-
xmlData[i+3] === 'N' &&
22-
xmlData[i+4] === 'T' &&
23-
xmlData[i+5] === 'I' &&
24-
xmlData[i+6] === 'T' &&
25-
xmlData[i+7] === 'Y'
26-
){
27-
i += 7;
28-
entity = true;
29-
}else if( hasBody &&
30-
xmlData[i+1] === '!' &&
31-
xmlData[i+2] === 'E' &&
32-
xmlData[i+3] === 'L' &&
33-
xmlData[i+4] === 'E' &&
34-
xmlData[i+5] === 'M' &&
35-
xmlData[i+6] === 'E' &&
36-
xmlData[i+7] === 'N' &&
37-
xmlData[i+8] === 'T'
38-
){
39-
//Not supported
40-
i += 8;
41-
}else if( hasBody &&
42-
xmlData[i+1] === '!' &&
43-
xmlData[i+2] === 'A' &&
44-
xmlData[i+3] === 'T' &&
45-
xmlData[i+4] === 'T' &&
46-
xmlData[i+5] === 'L' &&
47-
xmlData[i+6] === 'I' &&
48-
xmlData[i+7] === 'S' &&
49-
xmlData[i+8] === 'T'
50-
){
51-
//Not supported
52-
i += 8;
53-
}else if( hasBody &&
54-
xmlData[i+1] === '!' &&
55-
xmlData[i+2] === 'N' &&
56-
xmlData[i+3] === 'O' &&
57-
xmlData[i+4] === 'T' &&
58-
xmlData[i+5] === 'A' &&
59-
xmlData[i+6] === 'T' &&
60-
xmlData[i+7] === 'I' &&
61-
xmlData[i+8] === 'O' &&
62-
xmlData[i+9] === 'N'
63-
){
64-
//Not supported
65-
i += 9;
66-
}else if( //comment
67-
xmlData[i+1] === '!' &&
68-
xmlData[i+2] === '-' &&
69-
xmlData[i+3] === '-'
70-
){
71-
comment = true;
72-
}else{
73-
throw new Error("Invalid DOCTYPE");
17+
if (xmlData[i] === '<' && !comment) { //Determine the tag type
18+
if( hasBody && isEntity(xmlData, i)){
19+
i += 7;
20+
[entityName, val,i] = readEntityExp(xmlData,i+1);
21+
if(val.indexOf("&") === -1) //Parameter entities are not supported
22+
entities[ entityName ] = {
23+
regx : RegExp( `&${entityName};`,"g"),
24+
val: val
25+
};
7426
}
27+
else if( hasBody && isElement(xmlData, i)) i += 8;//Not supported
28+
else if( hasBody && isAttlist(xmlData, i)) i += 8;//Not supported
29+
else if( hasBody && isNotation(xmlData, i)) i += 9;//Not supported
30+
else if( isComment) comment = true;
31+
else throw new Error("Invalid DOCTYPE");
32+
7533
angleBracketsCount++;
7634
exp = "";
77-
} else if (xmlData[i] === '>') {
35+
} else if (xmlData[i] === '>') { //Read tag content
7836
if(comment){
7937
if( xmlData[i - 1] === "-" && xmlData[i - 2] === "-"){
8038
comment = false;
8139
angleBracketsCount--;
8240
}
8341
}else{
84-
if(entity) {
85-
parseEntityExp(exp, entities);
86-
entity = false;
87-
}
8842
angleBracketsCount--;
8943
}
9044
if (angleBracketsCount === 0) {
@@ -105,14 +59,85 @@ function readDocType(xmlData, i){
10559
return {entities, i};
10660
}
10761

108-
const entityRegex = RegExp("^\\s([a-zA-z0-0]+)[ \t](['\"])([^&]+)\\2");
109-
function parseEntityExp(exp, entities){
110-
const match = entityRegex.exec(exp);
111-
if(match){
112-
entities[ match[1] ] = {
113-
regx : RegExp( `&${match[1]};`,"g"),
114-
val: match[3]
115-
};
62+
function readEntityExp(xmlData,i){
63+
//External entities are not supported
64+
// <!ENTITY ext SYSTEM "http://normal-website.com" >
65+
66+
//Parameter entities are not supported
67+
// <!ENTITY entityname "&anotherElement;">
68+
69+
//Internal entities are supported
70+
// <!ENTITY entityname "replacement text">
71+
72+
//read EntityName
73+
let entityName = "";
74+
for (; i < xmlData.length && (xmlData[i] !== "'" && xmlData[i] !== '"' ); i++) {
75+
// if(xmlData[i] === " ") continue;
76+
// else
77+
entityName += xmlData[i];
78+
}
79+
entityName = entityName.trim();
80+
if(entityName.indexOf(" ") !== -1) throw new Error("External entites are not supported");
81+
82+
//read Entity Value
83+
const startChar = xmlData[i++];
84+
let val = ""
85+
for (; i < xmlData.length && xmlData[i] !== startChar ; i++) {
86+
val += xmlData[i];
11687
}
88+
return [entityName, val, i];
11789
}
90+
91+
function isComment(xmlData, i){
92+
if(xmlData[i+1] === '!' &&
93+
xmlData[i+2] === '-' &&
94+
xmlData[i+3] === '-') return true
95+
return false
96+
}
97+
function isEntity(xmlData, i){
98+
if(xmlData[i+1] === '!' &&
99+
xmlData[i+2] === 'E' &&
100+
xmlData[i+3] === 'N' &&
101+
xmlData[i+4] === 'T' &&
102+
xmlData[i+5] === 'I' &&
103+
xmlData[i+6] === 'T' &&
104+
xmlData[i+7] === 'Y') return true
105+
return false
106+
}
107+
function isElement(xmlData, i){
108+
if(xmlData[i+1] === '!' &&
109+
xmlData[i+2] === 'E' &&
110+
xmlData[i+3] === 'L' &&
111+
xmlData[i+4] === 'E' &&
112+
xmlData[i+5] === 'M' &&
113+
xmlData[i+6] === 'E' &&
114+
xmlData[i+7] === 'N' &&
115+
xmlData[i+8] === 'T') return true
116+
return false
117+
}
118+
119+
function isAttlist(xmlData, i){
120+
if(xmlData[i+1] === '!' &&
121+
xmlData[i+2] === 'A' &&
122+
xmlData[i+3] === 'T' &&
123+
xmlData[i+4] === 'T' &&
124+
xmlData[i+5] === 'L' &&
125+
xmlData[i+6] === 'I' &&
126+
xmlData[i+7] === 'S' &&
127+
xmlData[i+8] === 'T') return true
128+
return false
129+
}
130+
function isNotation(xmlData, i){
131+
if(xmlData[i+1] === '!' &&
132+
xmlData[i+2] === 'N' &&
133+
xmlData[i+3] === 'O' &&
134+
xmlData[i+4] === 'T' &&
135+
xmlData[i+5] === 'A' &&
136+
xmlData[i+6] === 'T' &&
137+
xmlData[i+7] === 'I' &&
138+
xmlData[i+8] === 'O' &&
139+
xmlData[i+9] === 'N') return true
140+
return false
141+
}
142+
118143
module.exports = readDocType;

0 commit comments

Comments
 (0)