Skip to content

Commit a17dec4

Browse files
committed
gumbo: Make sure to use the char* pointer as the hashmap item
Previously we were passing a character string of variable length, but always copying 8 bytes into the item. It's not really a problem if the length of the string is less than 8, but it's more serious if the length is longer than 8. Fixes #3500 Fixes #3508
1 parent a1fb9e4 commit a17dec4

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

gumbo-parser/src/string_set.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88

99
static int
1010
string_compare(const void *a, const void *b, void *udata) {
11-
return strcmp((const char *)a, (const char *)b);
11+
return strcmp(*(const char **)a, *(const char **)b);
1212
}
1313

1414
static uint64_t
1515
string_hash(const void *item, uint64_t seed0, uint64_t seed1) {
16-
const char *str = (const char *)item;
16+
const char *str = *(const char **)item;
1717
return hashmap_xxhash3(str, strlen(str), seed0, seed1);
1818
}
1919

@@ -31,11 +31,11 @@ void gumbo_string_set_free(GumboStringSet *set)
3131
void
3232
gumbo_string_set_insert(GumboStringSet *set, const char *str)
3333
{
34-
hashmap_set(set, str);
34+
hashmap_set(set, &str);
3535
}
3636

3737
int
3838
gumbo_string_set_contains(GumboStringSet *set, const char *str)
3939
{
40-
return hashmap_get(set, str) == NULL ? 0 : 1;
40+
return hashmap_get(set, &str) == NULL ? 0 : 1;
4141
}

test/html5/test_attributes.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,20 @@ def test_duplicate_attributes
2727
assert_equal(676, span.attributes.length, "duplicate attribute should be silently ignored")
2828
assert_equal("1", span["bb"], "bb attribute should hold the value of the first occurrence")
2929
end
30+
31+
# Using long (longer than 8 bytes) attributes exercises the gumbo hashmap implementation.
32+
# See https://github.com/sparklemotion/nokogiri/issues/3500
33+
def test_duplicate_attributes_long
34+
html = +"<span "
35+
("abcdefghijklmnopqrst00".."abcdefghijklmnopqrst99").each do |attr|
36+
html << "#{attr}='1' "
37+
end
38+
("abcdefghijklmnopqrst00".."abcdefghijklmnopqrst99").each do |attr|
39+
html << "#{attr}='2' "
40+
end
41+
html << ">"
42+
span = Nokogiri::HTML5::DocumentFragment.parse(html, max_attributes: 1000).at_css("span")
43+
44+
assert_equal(100, span.attributes.length, "duplicate attribute should be silently ignored")
45+
end
3046
end if Nokogiri.uses_gumbo?

0 commit comments

Comments
 (0)