Skip to content

Optimizes private key normalization #4224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 61 additions & 23 deletions pkg/detectors/privatekey/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,66 @@ import (
"strings"
)

func Normalize(in string) string {
in = strings.ReplaceAll(in, `"`, "")
in = strings.ReplaceAll(in, `'`, "")
in = strings.ReplaceAll(in, "\t", "")
in = strings.ReplaceAll(in, `\t`, "")
in = strings.ReplaceAll(in, `\\t`, "")
in = strings.ReplaceAll(in, `\n`, "\n")
in = strings.ReplaceAll(in, `\\r\\n`, "\n")
in = strings.ReplaceAll(in, `\r\n`, "\n")
in = strings.ReplaceAll(in, "\r\n", "\n")
in = strings.ReplaceAll(in, `\\r`, "\n")
in = strings.ReplaceAll(in, "\r", "\n")
in = strings.ReplaceAll(in, `\r`, "\n")
in = strings.ReplaceAll(in, `\\n`, "\n")
in = strings.ReplaceAll(in, `\n\n`, "\n")
in = strings.ReplaceAll(in, "\n\n", "\n")
in = strings.ReplaceAll(in, `\\`, "\n")

cleaned := strings.Builder{}
parts := strings.Split(in, "\n")
for _, line := range parts {
cleaned.WriteString(strings.TrimSpace(line) + "\n")
// escapeReplacer converts a variety of platform‐specific and escaped
// whitespace sequences into a canonical representation.
// The order of the patterns matters: the double-escaped forms (e.g. "\\n")
// must be replaced before the single-escaped forms ("\n") so that each
// sequence is handled exactly once.
var escapeReplacer = strings.NewReplacer(
`\\n`, "\n", // Double-escaped newlines
`\n`, "\n", // Single-escaped newlines
`\\r\\n`, "\n", // Double-escaped CRLF
`\r\n`, "\n", // Escaped CRLF
"\r\n", "\n", // Actual CRLF
`\\r`, "\n", // Double-escaped CR
`\r`, "\n", // Escaped CR
"\r", "\n", // Actual CR
`\\t`, "", // Double-escaped tabs
`\t`, "", // Escaped tabs
"\t", "", // Actual tabs
)

// Normalize prepares a raw private-key string for parsing.
//
// Normalize applies a best-effort cleanup so that keys copied from environment
// variables, JSON blobs, or other text formats are converted into a form that
// Go's crypto/x509 and crypto/ssh parsers can consume reliably. It performs the
// following steps:
//
// 1. Trim leading and trailing whitespace.
// 2. Remove a single layer of surrounding single or double quotes.
// 3. Replace escaped or platform-specific newline/carriage-return/tab
// sequences with a canonical "\n", removing tabs entirely.
// 4. Split the text on "\n", trim each line, drop empty lines, and rejoin
// the remainder with a trailing newline.
//
// The function never returns an error; if "in" is already well-formed, it is
// returned unchanged (apart from guaranteeing a trailing newline).
func Normalize(raw string) string {
raw = strings.TrimSpace(raw)

// Remove surrounding quotes if present.
if len(raw) >= 2 {
if (raw[0] == '"' && raw[len(raw)-1] == '"') ||
(raw[0] == '\'' && raw[len(raw)-1] == '\'') {
raw = raw[1 : len(raw)-1]
}
}

// Canonicalize escape sequences in one pass.
raw = escapeReplacer.Replace(raw)

var result strings.Builder
result.Grow(len(raw))

// Normalize per-line whitespace and discard blank lines.
lines := strings.Split(raw, "\n")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see how memory usage decreases by minimizing allocations because they have overhead, but I would guess a bigger source of memory usage is building a new array of the whole string here. If you change this loop to use strings.Find to iterate through the string line by line you can probably halve memory usage. It's more tedious and harder to read, but such is the nature of optimizing I guess.

for _, line := range lines {
if trimmed := strings.TrimSpace(line); trimmed != "" {
result.WriteString(trimmed)
result.WriteByte('\n')
}
}
return cleaned.String()

return result.String()
}
100 changes: 100 additions & 0 deletions pkg/detectors/privatekey/normalize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package privatekey

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func TestNormalize(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "quoted_rsa_private_key",
input: `"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END RSA PRIVATE KEY-----"`,
expected: "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END RSA PRIVATE KEY-----\n",
},
{
name: "escaped_newlines_private_key",
input: "-----BEGIN PRIVATE KEY-----\\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC7VJTUt9Us8cKB\\nUkJJKHhHVXvmLOHEaYKXlLFyFGZfJTZWlbNJOkYGKhLBBsGgVJGWZJGKzKwJ7c\\n-----END PRIVATE KEY-----",
expected: "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC7VJTUt9Us8cKB\nUkJJKHhHVXvmLOHEaYKXlLFyFGZfJTZWlbNJOkYGKhLBBsGgVJGWZJGKzKwJ7c\n-----END PRIVATE KEY-----\n",
},
{
name: "windows_crlf_private_key",
input: "-----BEGIN RSA PRIVATE KEY-----\r\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\r\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\r\n-----END RSA PRIVATE KEY-----",
expected: "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END RSA PRIVATE KEY-----\n",
},
{
name: "tabs_and_quotes_private_key",
input: `"-----BEGIN PRIVATE KEY-----\t\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC7VJTUt9Us8cKB\t\nUkJJKHhHVXvmLOHEaYKXlLFyFGZfJTZWlbNJOkYGKhLBBsGgVJGWZJGKzKwJ7c\t\n-----END PRIVATE KEY-----"`,
expected: "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC7VJTUt9Us8cKB\nUkJJKHhHVXvmLOHEaYKXlLFyFGZfJTZWlbNJOkYGKhLBBsGgVJGWZJGKzKwJ7c\n-----END PRIVATE KEY-----\n",
},
{
name: "double_escaped_newlines",
input: "-----BEGIN RSA PRIVATE KEY-----\\\\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\\\\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\\\\n-----END RSA PRIVATE KEY-----",
expected: "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END RSA PRIVATE KEY-----\n",
},
{
name: "ec_private_key_with_mixed_formatting",
input: `'-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIK8X2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1ZoAoGCCqGSM49\nAwEHoUQDQgAE7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END EC PRIVATE KEY-----'`,
expected: "-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIK8X2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1ZoAoGCCqGSM49\nAwEHoUQDQgAE7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END EC PRIVATE KEY-----\n",
},
{
name: "openssh_private_key_with_spaces",
input: ` "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAFwAAAAdzc2gtcn\nNhAAAAAwEAAQAAAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END OPENSSH PRIVATE KEY-----" `,
expected: "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAFwAAAAdzc2gtcn\nNhAAAAAwEAAQAAAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END OPENSSH PRIVATE KEY-----\n",
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.expected, Normalize(tc.input))
})
}
}

func BenchmarkNormalize(b *testing.B) {
testCases := []struct {
name string
input string
}{
{
name: "quoted_rsa_key",
input: `"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END RSA PRIVATE KEY-----"`,
},
{
name: "escaped_newlines_key",
input: "-----BEGIN PRIVATE KEY-----\\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC7VJTUt9Us8cKB\\nUkJJKHhHVXvmLOHEaYKXlLFyFGZfJTZWlbNJOkYGKhLBBsGgVJGWZJGKzKwJ7c\\n-----END PRIVATE KEY-----",
},
{
name: "windows_crlf_key",
input: "-----BEGIN RSA PRIVATE KEY-----\r\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\r\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\r\n-----END RSA PRIVATE KEY-----",
},
{
name: "large_key_with_mixed_formatting",
input: strings.Repeat(`"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\n-----END RSA PRIVATE KEY-----"\n`, 10),
},
{
name: "ec_key_with_tabs_and_quotes",
input: `'-----BEGIN EC PRIVATE KEY-----\t\nMHcCAQEEIK8X2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1ZoAoGCCqGSM49\t\nAwEHoUQDQgAE7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\t\n-----END EC PRIVATE KEY-----'`,
},
{
name: "openssh_key_double_escaped",
input: "-----BEGIN OPENSSH PRIVATE KEY-----\\\\nb3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAFwAAAAdzc2gtcn\\\\nNhAAAAAwEAAQAAAQEA7Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z2Z1Z\\\\n-----END OPENSSH PRIVATE KEY-----",
},
}

for _, tc := range testCases {
b.Run(tc.name, func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(len(tc.input)))
for i := 0; i < b.N; i++ {
_ = Normalize(tc.input)
}
})
}
}
Loading