Skip to content

Commit 277db2e

Browse files
committed
dep: change java html dep to neko-htmlunit
and update the implementation and the tests
1 parent a00e0d4 commit 277db2e

18 files changed

+49
-36
lines changed

ext/java/nokogiri/Html4ElementDescription.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import java.util.List;
77
import java.util.Map;
88

9-
import org.cyberneko.html.HTMLElements;
9+
import net.sourceforge.htmlunit.cyberneko.HTMLElements;
1010
import org.jruby.Ruby;
1111
import org.jruby.RubyClass;
1212
import org.jruby.RubyObject;
@@ -24,6 +24,7 @@
2424
public class Html4ElementDescription extends RubyObject
2525
{
2626
private static final long serialVersionUID = 1L;
27+
private static final HTMLElements htmlElements_ = new HTMLElements();
2728

2829
/**
2930
* Stores memoized hash of element -> list of valid subelements.
@@ -63,9 +64,8 @@ public class Html4ElementDescription extends RubyObject
6364
* the list of elements directly because it's protected.
6465
*/
6566
for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
66-
HTMLElements.Element maybe_sub =
67-
HTMLElements.getElement(c);
68-
if (maybe_sub.isParent(elem)) {
67+
HTMLElements.Element maybe_sub = htmlElements_.getElement(c);
68+
if (maybe_sub != null && maybe_sub.isParent(elem)) {
6969
subs.add(maybe_sub.name);
7070
}
7171
}
@@ -82,11 +82,10 @@ public class Html4ElementDescription extends RubyObject
8282
IRubyObject klazz, IRubyObject name)
8383
{
8484

85-
// nekohtml will return an element even for invalid names, see
86-
// http://sourceforge.net/p/nekohtml/code/HEAD/tree/trunk/src/org/cyberneko/html/HTMLElements.java#l514
87-
// which breaks `test_fetch_nonexistent'
88-
HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT);
89-
if (elem == HTMLElements.NO_SUCH_ELEMENT) {
85+
// nekohtml will return an element even for invalid names, which breaks `test_fetch_nonexistent'
86+
// see getElement() in HTMLElements.java
87+
HTMLElements.Element elem = htmlElements_.getElement(name.asJavaString(), htmlElements_.NO_SUCH_ELEMENT);
88+
if (elem == htmlElements_.NO_SUCH_ELEMENT) {
9089
return context.nil;
9190
}
9291

ext/java/nokogiri/Html4EntityLookup.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import static org.jruby.runtime.Helpers.invoke;
44

5-
import org.cyberneko.html.HTMLEntities;
5+
import net.sourceforge.htmlunit.cyberneko.HTMLEntitiesParser;
66
import org.jruby.Ruby;
77
import org.jruby.RubyClass;
88
import org.jruby.RubyObject;
@@ -38,8 +38,18 @@ public class Html4EntityLookup extends RubyObject
3838
{
3939
Ruby ruby = context.getRuntime();
4040
String name = key.toString();
41-
int val = HTMLEntities.get(name);
42-
if (val == -1) { return ruby.getNil(); }
41+
42+
HTMLEntitiesParser parser = new HTMLEntitiesParser();
43+
for (int j = 0 ; j < name.length() ; j++) {
44+
if (!parser.parse(name.charAt(j))) {
45+
break;
46+
}
47+
}
48+
String match = parser.getMatch();
49+
50+
if (match == null) { return ruby.getNil(); }
51+
52+
int val = match.charAt(0);
4353

4454
IRubyObject edClass =
4555
ruby.getClassFromPath("Nokogiri::HTML4::EntityDescription");

ext/java/nokogiri/Html4SaxParserContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import java.util.regex.Pattern;
1010

1111
import org.apache.xerces.parsers.AbstractSAXParser;
12-
import org.cyberneko.html.parsers.SAXParser;
12+
import net.sourceforge.htmlunit.cyberneko.parsers.SAXParser;
1313
import org.jruby.Ruby;
1414
import org.jruby.RubyClass;
1515
import org.jruby.RubyFixnum;

ext/java/nokogiri/NokogiriService.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ public class NokogiriService implements BasicLibraryService
8686
RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX");
8787
RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT");
8888

89-
createJavaLibraryVersionConstants(ruby, nokogiri);
9089
createNokogiriModule(ruby, nokogiri);
9190
createSyntaxErrors(ruby, nokogiri, xmlModule);
9291
RubyClass xmlNode = createXmlModule(ruby, xmlModule);
@@ -97,12 +96,6 @@ public class NokogiriService implements BasicLibraryService
9796
nokogiri.setInternalVariable("cache", populateNokogiriClassCahce(ruby));
9897
}
9998

100-
private void
101-
createJavaLibraryVersionConstants(Ruby ruby, RubyModule nokogiri)
102-
{
103-
nokogiri.defineConstant("NEKO_VERSION", ruby.newString(org.cyberneko.html.Version.getVersion()));
104-
}
105-
10699
private void
107100
createNokogiriModule(Ruby ruby, RubyModule nokogiri)
108101
{

ext/java/nokogiri/internals/HtmlDomParserContext.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
import org.apache.xerces.xni.XNIException;
1616
import org.apache.xerces.xni.parser.XMLDocumentFilter;
1717
import org.apache.xerces.xni.parser.XMLParserConfiguration;
18-
import org.cyberneko.html.HTMLConfiguration;
19-
import org.cyberneko.html.filters.DefaultFilter;
18+
import net.sourceforge.htmlunit.cyberneko.HTMLConfiguration;
19+
import net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter;
2020
import org.jruby.Ruby;
2121
import org.jruby.RubyClass;
2222
import org.jruby.runtime.ThreadContext;

ext/java/nokogiri/internals/SaveContextVisitor.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import java.util.regex.Matcher;
1313
import java.util.regex.Pattern;
1414

15-
import org.cyberneko.html.HTMLElements;
15+
import net.sourceforge.htmlunit.cyberneko.HTMLElements;
1616
import org.w3c.dom.Attr;
1717
import org.w3c.dom.CDATASection;
1818
import org.w3c.dom.Comment;
@@ -81,6 +81,8 @@ public class SaveContextVisitor
8181
public static final int SUBSETS = 8;
8282
public static final int EXCLUSIVE = 16;
8383

84+
private static final HTMLElements htmlElements_ = new HTMLElements();
85+
8486
public
8587
SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment,
8688
int canonicalOpts)
@@ -498,7 +500,7 @@ public class SaveContextVisitor
498500
private boolean
499501
isEmpty(String name)
500502
{
501-
HTMLElements.Element element = HTMLElements.getElement(name);
503+
HTMLElements.Element element = htmlElements_.getElement(name);
502504
return element.isEmpty();
503505
}
504506

lib/nokogiri/jruby/dependencies.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
# should skip loading xml jars. This is because those are in WEB-INF/lib and
1010
# already set in the classpath.
1111
unless $LOAD_PATH.to_s.include?("appengine-rack")
12-
require "nekohtml.jar"
1312
require "nekodtd.jar"
1413
end
1514

lib/nokogiri/jruby/nekohtml.jar

-124 KB
Binary file not shown.
368 KB
Binary file not shown.

lib/nokogiri/jruby/nokogiri_jars.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
99
require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
1010
require 'xalan/serializer/2.7.2/serializer-2.7.2.jar'
11+
require 'net/sourceforge/htmlunit/neko-htmlunit/2.61.0/neko-htmlunit-2.61.0.jar'
1112
require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
1213
end
1314

@@ -18,11 +19,13 @@
1819
require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
1920
require_jar 'xml-apis', 'xml-apis', '1.4.01'
2021
require_jar 'xalan', 'serializer', '2.7.2'
22+
require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.61.0'
2123
require_jar 'isorelax', 'isorelax', '20030108'
2224
end
2325

2426
# generated by the :vendor_jars rake task
2527
module Nokogiri
26-
JAR_DEPENDENCIES = {"isorelax"=>"isorelax:isorelax:20030108", "jing"=>"nu.validator:jing:20200702VNU", "serializer"=>"xalan:serializer:2.7.2", "xalan"=>"xalan:xalan:2.7.2", "xercesImpl"=>"xerces:xercesImpl:2.12.2", "xml-apis"=>"xml-apis:xml-apis:1.4.01"}.freeze
28+
JAR_DEPENDENCIES = {"isorelax"=>"isorelax:isorelax:20030108", "neko-htmlunit"=>"net.sourceforge.htmlunit:neko-htmlunit:2.61.0", "jing"=>"nu.validator:jing:20200702VNU", "serializer"=>"xalan:serializer:2.7.2", "xalan"=>"xalan:xalan:2.7.2", "xercesImpl"=>"xerces:xercesImpl:2.12.2", "xml-apis"=>"xml-apis:xml-apis:1.4.01"}.freeze
2729
XERCES_VERSION = JAR_DEPENDENCIES["xercesImpl"]
30+
NEKO_VERSION = JAR_DEPENDENCIES["neko-htmlunit"]
2831
end

0 commit comments

Comments
 (0)