File tree Expand file tree Collapse file tree 4 files changed +60
-9
lines changed
Expand file tree Collapse file tree 4 files changed +60
-9
lines changed Original file line number Diff line number Diff line change @@ -182,6 +182,46 @@ def force_correct_attribute_escaping!(node)
182182 end . force_encoding ( encoding )
183183 end
184184 end
185+
186+ def cdata_needs_escaping? ( node )
187+ # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
188+ node . cdata? || ( Nokogiri . jruby? && node . text? && ( node . parent . name == "style" || node . parent . name == "script" ) )
189+ end
190+
191+ def cdata_escape ( node )
192+ escaped_text = escape_tags ( node . text )
193+ if Nokogiri . jruby?
194+ node . document . create_text_node ( escaped_text )
195+ else
196+ node . document . create_cdata ( escaped_text )
197+ end
198+ end
199+
200+ TABLE_FOR_ESCAPE_HTML__ = {
201+ '<' => '<' ,
202+ '>' => '>' ,
203+ '&' => '&' ,
204+ }
205+
206+ def escape_tags ( string )
207+ # modified version of CGI.escapeHTML from ruby 3.1
208+ enc = string . encoding
209+ unless enc . ascii_compatible?
210+ if enc . dummy?
211+ origenc = enc
212+ enc = Encoding ::Converter . asciicompat_encoding ( enc )
213+ string = enc ? string . encode ( enc ) : string . b
214+ end
215+ table = Hash [ TABLE_FOR_ESCAPE_HTML__ . map { |pair |pair . map { |s |s . encode ( enc ) } } ]
216+ string = string . gsub ( /#{ "[<>&]" . encode ( enc ) } / , table )
217+ string . encode! ( origenc ) if origenc
218+ string
219+ else
220+ string = string . b
221+ string . gsub! ( /[<>&]/ , TABLE_FOR_ESCAPE_HTML__ )
222+ string . force_encoding ( enc )
223+ end
224+ end
185225 end
186226 end
187227 end
Original file line number Diff line number Diff line change @@ -108,6 +108,10 @@ def html5lib_sanitize(node)
108108 return Scrubber ::CONTINUE
109109 end
110110 when Nokogiri ::XML ::Node ::TEXT_NODE , Nokogiri ::XML ::Node ::CDATA_SECTION_NODE
111+ if HTML5 ::Scrub . cdata_needs_escaping? ( node )
112+ node . before ( HTML5 ::Scrub . cdata_escape ( node ) )
113+ return Scrubber ::STOP
114+ end
111115 return Scrubber ::CONTINUE
112116 end
113117 Scrubber ::STOP
Original file line number Diff line number Diff line change @@ -100,13 +100,9 @@ def initialize
100100
101101 def scrub ( node )
102102 return CONTINUE if html5lib_sanitize ( node ) == CONTINUE
103- if node . children . length == 1 && node . children . first . cdata?
104- sanitized_text = Loofah . fragment ( node . children . first . to_html ) . scrub! ( :strip ) . to_html
105- node . before Nokogiri ::XML ::Text . new ( sanitized_text , node . document )
106- else
107- node . before node . children
108- end
103+ node . before ( node . children )
109104 node . remove
105+ return STOP
110106 end
111107 end
112108
Original file line number Diff line number Diff line change @@ -100,17 +100,28 @@ def test_return_empty_string_when_nothing_left
100100 end
101101
102102 def test_nested_script_cdata_tags_should_be_scrubbed
103- html = "<script><script src=' malicious.js'> </script>"
103+ html = "<script><script src=\" malicious.js\" >this & that </script>"
104104 stripped = Loofah . fragment ( html ) . scrub! ( :strip )
105+
105106 assert_empty stripped . xpath ( "//script" )
106- refute_match ( "< script", stripped . to_html )
107+ assert_equal ( "< script src= \" malicious.js \" >this & that ", stripped . to_html )
107108 end
108109
109110 def test_nested_script_cdata_tags_should_be_scrubbed_2
110111 html = "<script><script>alert('a');</script></script>"
111112 stripped = Loofah . fragment ( html ) . scrub! ( :strip )
113+
112114 assert_empty stripped . xpath ( "//script" )
113- refute_match ( "<script" , stripped . to_html )
115+ assert_equal ( "<script>alert('a');" , stripped . to_html )
116+ end
117+
118+ def test_nested_script_cdata_tags_should_be_scrubbed_max_recursion
119+ n = 40
120+ html = "<div>" + ( "<script>" * n ) + "alert(1);" + ( "</script>" * n ) + "</div>"
121+ expected = "<div>" + ( "<script>" * ( n -1 ) ) + "alert(1);</div>"
122+ actual = Loofah . fragment ( html ) . scrub! ( :strip ) . to_html
123+
124+ assert_equal ( expected , actual )
114125 end
115126
116127 def test_removal_of_all_tags
You can’t perform that action at this time.
0 commit comments