1
1
# encoding: utf-8
2
2
3
+ require 'set'
4
+
3
5
class Sanitize ; module Transformers ; class CleanElement
4
6
5
7
# Matches a valid HTML5 data attribute name. The unicode ranges included here
@@ -24,21 +26,28 @@ class Sanitize; module Transformers; class CleanElement
24
26
REGEX_PROTOCOL = /\A ([^\/ #]*?)(?:\: |�*58|�*3a)/i
25
27
26
28
def initialize ( config )
27
- @config = config
28
-
29
- # For faster lookups.
30
29
@add_attributes = config [ :add_attributes ]
31
- @allowed_elements = Set . new ( config [ :elements ] )
32
- @attributes = config [ :attributes ]
30
+ @attributes = config [ :attributes ] . dup
31
+ @elements = Set . new ( config [ :elements ] )
33
32
@protocols = config [ :protocols ]
34
33
@remove_all_contents = false
35
34
@remove_element_contents = Set . new
36
- @whitespace_elements = Hash . new
35
+ @whitespace_elements = { }
36
+
37
+ if @attributes . include? ( :all )
38
+ @attributes [ :all ] = Set . new ( @attributes [ :all ] )
39
+ end
40
+
41
+ @attributes . each do |element_name , attrs |
42
+ unless element_name == :all
43
+ @attributes [ element_name ] = Set . new ( attrs ) . merge ( @attributes [ :all ] || [ ] )
44
+ end
45
+ end
37
46
38
- # Converting :whitespace_element into a Hash for backwards compatibility .
47
+ # Backcompat: if :whitespace_elements is an array, convert it to a hash .
39
48
if config [ :whitespace_elements ] . is_a? ( Array )
40
49
config [ :whitespace_elements ] . each do |element |
41
- @whitespace_elements [ element ] = { :before => ' ' , :after => ' ' }
50
+ @whitespace_elements [ element ] = { :before => ' ' , :after => ' ' }
42
51
end
43
52
else
44
53
@whitespace_elements = config [ :whitespace_elements ]
@@ -55,10 +64,10 @@ def call(env)
55
64
name = env [ :node_name ]
56
65
node = env [ :node ]
57
66
58
- return if env [ :is_whitelisted ] || ! node . element?
67
+ return if node . type != Nokogiri :: XML :: Node :: ELEMENT_NODE || env [ :is_whitelisted ]
59
68
60
69
# Delete any element that isn't in the config whitelist.
61
- unless @allowed_elements . include? ( name )
70
+ unless @elements . include? ( name )
62
71
# Elements like br, div, p, etc. need to be replaced with whitespace in
63
72
# order to preserve readability.
64
73
if @whitespace_elements . include? ( name )
@@ -77,21 +86,33 @@ def call(env)
77
86
return
78
87
end
79
88
80
- attr_whitelist = Set . new ( ( @attributes [ name ] || [ ] ) +
81
- ( @attributes [ :all ] || [ ] ) )
82
-
83
- allow_data_attributes = attr_whitelist . include? ( :data )
89
+ attr_whitelist = @attributes [ name ] || @attributes [ :all ]
84
90
85
- if attr_whitelist . empty ?
91
+ if attr_whitelist . nil ?
86
92
# Delete all attributes from elements with no whitelisted attributes.
87
93
node . attribute_nodes . each { |attr | attr . unlink }
88
94
else
95
+ allow_data_attributes = attr_whitelist . include? ( :data )
96
+
89
97
# Delete any attribute that isn't allowed on this element.
90
98
node . attribute_nodes . each do |attr |
91
99
attr_name = attr . name . downcase
92
100
93
- unless attr_whitelist . include? ( attr_name )
94
- # The attribute isn't explicitly whitelisted.
101
+ if attr_whitelist . include? ( attr_name )
102
+ # The attribute is whitelisted.
103
+
104
+ # Remove any attributes that use unacceptable protocols.
105
+ if @protocols . include? ( name ) && @protocols [ name ] . include? ( attr_name )
106
+ attr_protocols = @protocols [ name ] [ attr_name ]
107
+
108
+ if attr . value . to_s . downcase =~ REGEX_PROTOCOL
109
+ attr . unlink unless attr_protocols . include? ( $1. downcase )
110
+ else
111
+ attr . unlink unless attr_protocols . include? ( :relative )
112
+ end
113
+ end
114
+ else
115
+ # The attribute isn't whitelisted.
95
116
96
117
if allow_data_attributes && attr_name . start_with? ( 'data-' )
97
118
# Arbitrary data attributes are allowed. Verify that the attribute
@@ -104,28 +125,10 @@ def call(env)
104
125
end
105
126
end
106
127
end
107
-
108
- # Delete remaining attributes that use unacceptable protocols.
109
- if @protocols . has_key? ( name )
110
- protocol = @protocols [ name ]
111
-
112
- node . attribute_nodes . each do |attr |
113
- attr_name = attr . name . downcase
114
- next false unless protocol . has_key? ( attr_name )
115
-
116
- del = if attr . value . to_s . downcase =~ REGEX_PROTOCOL
117
- !protocol [ attr_name ] . include? ( $1. downcase )
118
- else
119
- !protocol [ attr_name ] . include? ( :relative )
120
- end
121
-
122
- attr . unlink if del
123
- end
124
- end
125
128
end
126
129
127
130
# Add required attributes.
128
- if @add_attributes . has_key ?( name )
131
+ if @add_attributes . include ?( name )
129
132
@add_attributes [ name ] . each { |key , val | node [ key ] = val }
130
133
end
131
134
end
0 commit comments