Skip to content

Commit 7a43138

Browse files
committed
Add HTML parsing to discover WordPress plugins
1 parent 78bd94a commit 7a43138

File tree

5 files changed

+246
-16
lines changed

5 files changed

+246
-16
lines changed

go.mod

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
module github.com/Chocapikk/wpprobe
22

3-
go 1.22
3+
go 1.23.0
44

5-
toolchain go1.22.12
5+
toolchain go1.24.3
66

77
require (
88
github.com/Masterminds/semver v1.5.0
@@ -11,7 +11,8 @@ require (
1111
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213
1212
github.com/schollz/progressbar/v3 v3.18.0
1313
github.com/spf13/cobra v1.8.1
14-
golang.org/x/text v0.21.0
14+
golang.org/x/net v0.40.0
15+
golang.org/x/text v0.25.0
1516
)
1617

1718
require (
@@ -25,6 +26,6 @@ require (
2526
github.com/muesli/termenv v0.15.2 // indirect
2627
github.com/rivo/uniseg v0.4.7 // indirect
2728
github.com/spf13/pflag v1.0.5 // indirect
28-
golang.org/x/sys v0.30.0 // indirect
29-
golang.org/x/term v0.28.0 // indirect
29+
golang.org/x/sys v0.33.0 // indirect
30+
golang.org/x/term v0.32.0 // indirect
3031
)

go.sum

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,15 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
4545
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
4646
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
4747
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
48+
golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
49+
golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
4850
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
49-
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
50-
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
51-
golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
52-
golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
53-
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
54-
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
51+
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
52+
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
53+
golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
54+
golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
55+
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
56+
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
5557
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
5658
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
5759
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

internal/scanner/html.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Copyright (c) 2025 Valentin Lobstein (Chocapikk) <[email protected]>
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
// this software and associated documentation files (the "Software"), to deal in
5+
// the Software without restriction, including without limitation the rights to
6+
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
// the Software, and to permit persons to whom the Software is furnished to do so,
8+
// subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in all
11+
// copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19+
20+
package scanner
21+
22+
import (
23+
"fmt"
24+
"io"
25+
"strings"
26+
"time"
27+
28+
"github.com/Chocapikk/wpprobe/internal/utils"
29+
"golang.org/x/net/html"
30+
)
31+
32+
func discoverPluginsFromHTML(target string, headers []string) ([]string, error) {
33+
normalized := utils.NormalizeURL(target) + "/"
34+
35+
client := utils.NewHTTPClient(10*time.Second, headers)
36+
htmlContent, err := client.Get(normalized)
37+
if err != nil {
38+
return nil, fmt.Errorf("failed to fetch homepage %s: %w", normalized, err)
39+
}
40+
41+
slugsSet := make(map[string]struct{})
42+
if err := extractSlugsFromReader(strings.NewReader(htmlContent), slugsSet); err != nil {
43+
return nil, fmt.Errorf("failed to parse HTML %s: %w", normalized, err)
44+
}
45+
46+
var slugs []string
47+
for slug := range slugsSet {
48+
slugs = append(slugs, slug)
49+
}
50+
return slugs, nil
51+
}
52+
53+
func extractSlugsFromReader(r io.Reader, dest map[string]struct{}) error {
54+
z := html.NewTokenizer(r)
55+
56+
for {
57+
tt := z.Next()
58+
switch tt {
59+
case html.ErrorToken:
60+
if z.Err() == io.EOF {
61+
return nil
62+
}
63+
return z.Err()
64+
65+
case html.StartTagToken, html.SelfClosingTagToken:
66+
t := z.Token()
67+
for _, attr := range t.Attr {
68+
val := strings.TrimSpace(attr.Val)
69+
if val == "" {
70+
continue
71+
}
72+
if attr.Key == "href" || attr.Key == "src" {
73+
if idx := strings.Index(val, "/wp-content/plugins/"); idx != -1 {
74+
rest := val[idx+len("/wp-content/plugins/"):]
75+
parts := strings.SplitN(rest, "/", 2)
76+
if len(parts) > 0 && parts[0] != "" {
77+
dest[parts[0]] = struct{}{}
78+
}
79+
}
80+
}
81+
}
82+
}
83+
}
84+
}

internal/scanner/html_test.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// html_test.go
2+
// Tests for HTML‐based plugin discovery functions.
3+
4+
package scanner
5+
6+
import (
7+
"net/http"
8+
"net/http/httptest"
9+
"reflect"
10+
"sort"
11+
"strings"
12+
"testing"
13+
)
14+
15+
func sortedSlice(ss []string) []string {
16+
s := append([]string(nil), ss...)
17+
sort.Strings(s)
18+
return s
19+
}
20+
21+
func TestExtractSlugsFromReader(t *testing.T) {
22+
tests := []struct {
23+
name string
24+
html string
25+
wantSlugs []string
26+
}{
27+
{
28+
name: "Single plugin in href",
29+
html: `<html><head>
30+
<link rel="stylesheet" href="https://example.com/wp-content/plugins/pluginA/style.css" />
31+
</head><body></body></html>`,
32+
wantSlugs: []string{"pluginA"},
33+
},
34+
{
35+
name: "Multiple plugins in href and src",
36+
html: `<html><body>
37+
<img src="/wp-content/plugins/pluginB/images/img.png" />
38+
<a href="http://foo/wp-content/plugins/pluginC/file.php"></a>
39+
</body></html>`,
40+
wantSlugs: []string{"pluginB", "pluginC"},
41+
},
42+
{
43+
name: "Duplicate slugs and nested paths",
44+
html: `<html><body>
45+
<script src="/wp-content/plugins/pluginA/js/app.js"></script>
46+
<link href="/wp-content/plugins/pluginA/css/app.css" rel="stylesheet">
47+
<img src="/some/other/path/pluginA/wp-content/plugins/pluginD/img.jpg">
48+
</body></html>`,
49+
wantSlugs: []string{"pluginA", "pluginD"},
50+
},
51+
{
52+
name: "No plugin references",
53+
html: `<html><body><p>No plugins here</p></body></html>`,
54+
wantSlugs: []string{},
55+
},
56+
{
57+
name: "Malformed attributes",
58+
html: `<html><body>
59+
<a href="wp-content/plugins//style.css"></a>
60+
<a href="/wp-content/plugins/"></a>
61+
</body></html>`,
62+
wantSlugs: []string{},
63+
},
64+
}
65+
66+
for _, tt := range tests {
67+
t.Run(tt.name, func(t *testing.T) {
68+
dest := make(map[string]struct{})
69+
err := extractSlugsFromReader(strings.NewReader(tt.html), dest)
70+
if err != nil {
71+
t.Fatalf("extractSlugsFromReader returned error: %v", err)
72+
}
73+
74+
var got []string
75+
for slug := range dest {
76+
got = append(got, slug)
77+
}
78+
got = sortedSlice(got)
79+
want := sortedSlice(tt.wantSlugs)
80+
81+
if !reflect.DeepEqual(got, want) {
82+
t.Errorf("extractSlugsFromReader = %v, want %v", got, want)
83+
}
84+
})
85+
}
86+
}
87+
88+
func TestDiscoverPluginsFromHTML(t *testing.T) {
89+
const sampleHTML = `<!DOCTYPE html>
90+
<html>
91+
<head>
92+
<link rel="stylesheet" href="/wp-content/plugins/pluginX/css/style.css">
93+
<script src="http://host/wp-content/plugins/pluginY/js/app.js"></script>
94+
</head>
95+
<body>
96+
<img src="/wp-content/plugins/pluginZ/images/pic.png" alt="image">
97+
<a href="/some/other/path"></a>
98+
</body>
99+
</html>`
100+
101+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
102+
w.WriteHeader(http.StatusOK)
103+
_, _ = w.Write([]byte(sampleHTML))
104+
}))
105+
defer ts.Close()
106+
107+
slugs, err := discoverPluginsFromHTML(ts.URL, nil)
108+
if err != nil {
109+
t.Fatalf("discoverPluginsFromHTML returned error: %v", err)
110+
}
111+
112+
got := sortedSlice(slugs)
113+
want := sortedSlice([]string{"pluginX", "pluginY", "pluginZ"})
114+
115+
if !reflect.DeepEqual(got, want) {
116+
t.Errorf("discoverPluginsFromHTML = %v, want %v", got, want)
117+
}
118+
}

internal/scanner/scan.go

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package scanner
2121

2222
import (
23+
"fmt"
2324
"math"
2425
"sync"
2526

@@ -100,6 +101,15 @@ func performStealthyScan(
100101
opts ScanOptions,
101102
progress *utils.ProgressManager,
102103
) ([]string, PluginDetectionResult) {
104+
if progress != nil && opts.File == "" {
105+
progress.SetMessage("🔎 Discovering plugins from HTML...")
106+
}
107+
108+
htmlSlugs, err := discoverPluginsFromHTML(target, opts.Headers)
109+
if err != nil {
110+
utils.DefaultLogger.Warning(fmt.Sprintf("HTML discovery failed on %s: %v", target, err))
111+
}
112+
103113
if progress != nil && opts.File == "" {
104114
progress.SetMessage("🔎 Scanning REST API endpoints...")
105115
}
@@ -116,14 +126,29 @@ func performStealthyScan(
116126
}
117127

118128
endpoints := FetchEndpoints(target, opts.Headers)
119-
if len(endpoints) == 0 {
120-
if opts.File == "" {
121-
utils.DefaultLogger.Warning("No REST endpoints found on " + target)
129+
130+
var result PluginDetectionResult
131+
if len(endpoints) > 0 {
132+
result = DetectPlugins(endpoints, endpointsData)
133+
} else {
134+
result = PluginDetectionResult{
135+
Plugins: make(map[string]*PluginData),
136+
Detected: nil,
137+
}
138+
}
139+
140+
for _, slug := range htmlSlugs {
141+
if _, exists := result.Plugins[slug]; !exists {
142+
result.Plugins[slug] = &PluginData{
143+
Score: 1,
144+
Confidence: 50.0,
145+
Ambiguous: false,
146+
Matches: nil,
147+
}
148+
result.Detected = append(result.Detected, slug)
122149
}
123-
return nil, PluginDetectionResult{}
124150
}
125151

126-
result := DetectPlugins(endpoints, endpointsData)
127152
if len(result.Detected) == 0 {
128153
return nil, result
129154
}

0 commit comments

Comments
 (0)