Skip to content

Commit d1d49cf

Browse files
committed
feat: unescape any HTML entities
Any HTML entities of URL-decoded characters should be unescape before perform the checks. $ curl localhost:3000 -so /dev/null -w "%{http_code}\n" -A X -d "body=%22autofocus%20onFocUs=%27%26%2397%3blert()%27" 403
1 parent e036517 commit d1d49cf

File tree

3 files changed

+38
-24
lines changed

3 files changed

+38
-24
lines changed

README.md

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -194,22 +194,22 @@ goos: linux
194194
goarch: amd64
195195
pkg: github.com/kitabisa/teler-waf
196196
cpu: 11th Gen Intel(R) Core(TM) i9-11900H @ 2.50GHz
197-
BenchmarkTelerDefaultOptions-4 4530 265197 ns/op 35710 B/op 1690 allocs/op
198-
BenchmarkTelerCommonWebAttackOnly-4 32484 35325 ns/op 5949 B/op 118 allocs/op
199-
BenchmarkTelerCVEOnly-4 6248 187397 ns/op 33402 B/op 1647 allocs/op
200-
BenchmarkTelerBadIPAddressOnly-4 20649 54890 ns/op 5974 B/op 86 allocs/op
201-
BenchmarkTelerBadReferrerOnly-4 48594 22629 ns/op 5548 B/op 87 allocs/op
202-
BenchmarkTelerBadCrawlerOnly-4 41832 26891 ns/op 5634 B/op 85 allocs/op
203-
BenchmarkTelerDirectoryBruteforceOnly-4 48087 22008 ns/op 5554 B/op 84 allocs/op
204-
BenchmarkTelerCustomRule-4 50428 21523 ns/op 5323 B/op 84 allocs/op
205-
BenchmarkTelerWithoutCommonWebAttack-4 5133 230608 ns/op 34619 B/op 1654 allocs/op
206-
BenchmarkTelerWithoutCVE-4 15229 75995 ns/op 7169 B/op 124 allocs/op
207-
BenchmarkTelerWithoutBadIPAddress-4 5677 211478 ns/op 34602 B/op 1685 allocs/op
208-
BenchmarkTelerWithoutBadReferrer-4 4875 240689 ns/op 35127 B/op 1684 allocs/op
209-
BenchmarkTelerWithoutBadCrawler-4 4922 238995 ns/op 35000 B/op 1686 allocs/op
210-
BenchmarkTelerWithoutDirectoryBruteforce-4 4894 242973 ns/op 35241 B/op 1687 allocs/op
197+
BenchmarkTelerDefaultOptions-4 4396 266918 ns/op 35944 B/op 1696 allocs/op
198+
BenchmarkTelerCommonWebAttackOnly-4 30795 35602 ns/op 5990 B/op 118 allocs/op
199+
BenchmarkTelerCVEOnly-4 6171 194193 ns/op 33533 B/op 1652 allocs/op
200+
BenchmarkTelerBadIPAddressOnly-4 20464 55957 ns/op 5986 B/op 86 allocs/op
201+
BenchmarkTelerBadReferrerOnly-4 48403 23128 ns/op 5551 B/op 87 allocs/op
202+
BenchmarkTelerBadCrawlerOnly-4 42002 27165 ns/op 5633 B/op 85 allocs/op
203+
BenchmarkTelerDirectoryBruteforceOnly-4 50103 23074 ns/op 5535 B/op 84 allocs/op
204+
BenchmarkTelerCustomRule-4 49483 22086 ns/op 5332 B/op 84 allocs/op
205+
BenchmarkTelerWithoutCommonWebAttack-4 5156 228950 ns/op 34683 B/op 1658 allocs/op
206+
BenchmarkTelerWithoutCVE-4 15295 76501 ns/op 7167 B/op 124 allocs/op
207+
BenchmarkTelerWithoutBadIPAddress-4 5484 216523 ns/op 34820 B/op 1691 allocs/op
208+
BenchmarkTelerWithoutBadReferrer-4 4894 240202 ns/op 35133 B/op 1689 allocs/op
209+
BenchmarkTelerWithoutBadCrawler-4 5012 239976 ns/op 34995 B/op 1691 allocs/op
210+
BenchmarkTelerWithoutDirectoryBruteforce-4 4736 247549 ns/op 35496 B/op 1693 allocs/op
211211
PASS
212-
ok github.com/kitabisa/teler-waf 23.207s
212+
ok github.com/kitabisa/teler-waf 23.660s
213213
```
214214

215215
> **Note**: It's important to note that the benchmarking results may vary and may not be consistent. Those results were obtained when there were **>1.5k** CVE templates and the [teler-resources](https://github.com/kitabisa/teler-resources) dataset may have increased since then, which may impact the results.

analyze.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ func (t *Teler) checkCustomRules(r *http.Request) error {
110110
// Converts map of headers to RAW string
111111
headers := headersToRawString(r.Header)
112112

113-
// Decode the URL-encoded request URI of the URL
114-
uri := toURLDecode(r.URL.RequestURI())
113+
// Decode the URL-encoded and unescape HTML entities request URI of the URL
114+
uri := stringDeUnescape(r.URL.RequestURI())
115115

116116
// Declare byte slice for request body.
117117
var body string
@@ -130,8 +130,8 @@ func (t *Teler) checkCustomRules(r *http.Request) error {
130130
body = buf.String()
131131
}
132132

133-
// Decode the URL-encoded of body
134-
body = toURLDecode(body)
133+
// Decode the URL-encoded and unescape HTML entities of body
134+
body = stringDeUnescape(body)
135135

136136
// Iterate over the Customs field of the Teler struct, which is a slice of custom rules
137137
for _, rule := range t.opt.Customs {
@@ -201,8 +201,8 @@ func (t *Teler) checkCustomRules(r *http.Request) error {
201201
// If a match is found, it returns an error indicating a common web attack has been detected.
202202
// If no match is found, it returns nil.
203203
func (t *Teler) checkCommonWebAttack(r *http.Request) error {
204-
// Decode the URL-encoded request URI of the URL
205-
uri := toURLDecode(r.URL.RequestURI())
204+
// Decode the URL-encoded and unescape HTML entities request URI of the URL
205+
uri := stringDeUnescape(r.URL.RequestURI())
206206

207207
// Declare byte slice for request body.
208208
var body string
@@ -221,8 +221,8 @@ func (t *Teler) checkCommonWebAttack(r *http.Request) error {
221221
body = buf.String()
222222
}
223223

224-
// Decode the URL-encoded of body
225-
body = toURLDecode(body)
224+
// Decode the URL-encoded and unescape HTML entities of body
225+
body = stringDeUnescape(body)
226226

227227
// Iterate over the filters in the CommonWebAttack data stored in the t.threat.cwa.Filters field
228228
for _, filter := range t.threat.cwa.Filters {

utils.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package teler
22

33
import (
44
"fmt"
5+
"html"
56
"strings"
67

78
"net/http"
@@ -69,11 +70,24 @@ func headersToRawString(headers http.Header) string {
6970
return h.String()
7071
}
7172

72-
// toURLDecode decode URL-decoded characters string using mdurl package
73+
// unescapeHTML to unescapes any HTML entities, i.e. á"
74+
// unescapes to "á", as does "á" and "á".
75+
func unescapeHTML(s string) string {
76+
return html.UnescapeString(s)
77+
}
78+
79+
// toURLDecode decode URL-decoded characters string using mdurl
7380
func toURLDecode(s string) string {
7481
return mdurl.Decode(s)
7582
}
7683

84+
// stringDeUnescape to decode URL-decoded characters, and
85+
// unescapes any HTML entities
86+
func stringDeUnescape(s string) string {
87+
s = toURLDecode(s)
88+
return unescapeHTML(s)
89+
}
90+
7791
// isValidMethod check if the given request.Method is valid
7892
func isValidMethod(method request.Method) bool {
7993
switch method {

0 commit comments

Comments
 (0)