Skip to content

Commit 7272020

Browse files
authored
Merge pull request #32 from wmde/html
Throw errors from HTML parsing
2 parents 5c214c1 + ba3f8e0 commit 7272020

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

src/HtmlParser.php

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public function parseHtml( string $html ): DOMDocument {
2828
// LIBXML_NOBLANKS Constant excludes "ghost nodes" to avoid violating
2929
// vue's single root node constraint
3030
if ( !$document->loadHTML( '<?xml encoding="utf-8" ?>' . $html, LIBXML_NOBLANKS ) ) {
31-
//TODO Test failure
31+
throw new Exception( 'Failed to parse HTML' );
3232
}
3333

3434
/** @var LibXMLError[] $errors */
@@ -41,9 +41,15 @@ public function parseHtml( string $html ): DOMDocument {
4141
libxml_disable_entity_loader( $entityLoaderDisabled );
4242
}
4343

44+
$exception = null;
4445
foreach ( $errors as $error ) {
45-
//TODO html5 tags can fail parsing
46-
//TODO Throw an exception
46+
if ( strpos( $error->message, 'Tag template invalid' ) === 0 ) {
47+
continue;
48+
}
49+
$exception = new Exception( $error->message, $error->code, $exception );
50+
}
51+
if ( $exception !== null ) {
52+
throw $exception;
4753
}
4854

4955
return $document;

tests/php/HtmlParserTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,11 @@ public function testTwoRootNodes() {
6363
$this->parseAndGetRootNode( '<p></p><p></p>' );
6464
}
6565

66+
public function testMalformedHtml(): void {
67+
$htmlParser = new HtmlParser();
68+
$this->expectException( Exception::class );
69+
$this->expectExceptionMessage( 'Unexpected end tag' );
70+
$htmlParser->parseHtml( '</p>' );
71+
}
72+
6673
}

0 commit comments

Comments
 (0)