Skip to content

Commit 0aecff0

Browse files
authored
logging: Add ability to deduplicate log messages (#48)
Motivating example is to not excessively log the same http 4xx code over and over from the same ip, as this doesn't add info and bloats our log volume. This requires some abuse of logrus to work, but we do this via a formatter which can either emit the original message, or the empty string to log no message (this is harmless). Later, a seperate goroutine will emit a new log that logs the original log and how many times it occurred. We always log the first time we see a given message, then start a timer. If we see the message again at least once in that time frame, we log at the end of the time frame how many times. Note this means we can say "repeated once", which is a little confusing but preferable to displaying the original message unchanged but time delayed, which can be very misleading as there's no indication of the time delay.
1 parent 8d1a16b commit 0aecff0

File tree

1 file changed

+130
-3
lines changed

1 file changed

+130
-3
lines changed

log/logging.go

Lines changed: 130 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@ import (
55
"fmt"
66
"os"
77
"strings"
8+
"sync"
9+
"time"
810

911
"golang.org/x/net/context"
1012

1113
log "github.com/Sirupsen/logrus"
1214
"github.com/weaveworks/common/user"
1315
)
1416

17+
const (
18+
defaultDedupeInterval = time.Minute
19+
)
20+
1521
// Setup configures logging output to stderr, sets the log level and sets the formatter.
1622
func Setup(logLevel string) error {
1723
log.SetOutput(os.Stderr)
@@ -24,6 +30,39 @@ func Setup(logLevel string) error {
2430
return nil
2531
}
2632

33+
// SetupDeduplication should be performed after any other logging setup.
34+
// For all logs less severe or equal to the given log level (but still higher than the logger's configured log level),
35+
// these logs will be 'deduplicated'. What this means is that, excluding certain special fields like time, multiple
36+
// identical log entries will be grouped up and a summary message emitted.
37+
// For example, instead of:
38+
// 00:00:00 INFO User 123 did xyz
39+
// 00:00:10 INFO User 123 did xyz
40+
// 00:00:25 INFO User 123 did xyz
41+
// 00:00:55 INFO User 123 did xyz
42+
// you would get:
43+
// 00:00:00 INFO User 123 did xyz
44+
// 00:01:00 INFO Repeated 3 times: User 123 did xyz
45+
// The interval argument controls how long to wait for additional messages to arrive before reporting.
46+
// Increase it to deduplicate more aggressively, decrease it to lower latency from a log occurring to it appearing.
47+
// Set it to 0 to pick a sensible default value (recommended).
48+
// NOTE: For simplicity and efficiency, fields are considered 'equal' if and only if their string representations (%v) are equal.
49+
func SetupDeduplication(logLevel string, interval time.Duration) error {
50+
dedupeLevel, err := log.ParseLevel(logLevel)
51+
if err != nil {
52+
return fmt.Errorf("Error parsing log level: %v", err)
53+
}
54+
if interval <= 0 {
55+
interval = defaultDedupeInterval
56+
}
57+
58+
// We use a special Formatter to either format the log using the original formatter, or to return ""
59+
// so nothing will be written for that event. The repeated entries are later logged along with a field flag
60+
// that tells the formatter to ignore the message.
61+
stdLogger := log.StandardLogger()
62+
stdLogger.Formatter = newDedupeFormatter(stdLogger.Formatter, dedupeLevel, interval)
63+
return nil
64+
}
65+
2766
type textFormatter struct{}
2867

2968
// Based off logrus.TextFormatter, which behaves completely
@@ -35,9 +74,7 @@ func (f *textFormatter) Format(entry *log.Entry) ([]byte, error) {
3574
timeStamp := entry.Time.Format("2006/01/02 15:04:05.000000")
3675
if len(entry.Data) > 0 {
3776
fmt.Fprintf(b, "%s: %s %-44s ", levelText, timeStamp, entry.Message)
38-
for k, v := range entry.Data {
39-
fmt.Fprintf(b, " %s=%v", k, v)
40-
}
77+
b.WriteString(fieldsToString(entry.Data))
4178
} else {
4279
// No padding when there's no fields
4380
fmt.Fprintf(b, "%s: %s %s", levelText, timeStamp, entry.Message)
@@ -55,3 +92,93 @@ func (f *textFormatter) Format(entry *log.Entry) ([]byte, error) {
5592
func With(ctx context.Context) *log.Entry {
5693
return log.WithFields(user.LogFields(ctx))
5794
}
95+
96+
type entryCount struct {
97+
entry log.Entry
98+
count int
99+
}
100+
101+
type dedupeFormatter struct {
102+
innerFormatter log.Formatter
103+
level log.Level
104+
interval time.Duration
105+
seen map[string]entryCount
106+
lock sync.Mutex
107+
}
108+
109+
func newDedupeFormatter(innerFormatter log.Formatter, level log.Level, interval time.Duration) *dedupeFormatter {
110+
return &dedupeFormatter{
111+
innerFormatter: innerFormatter,
112+
level: level,
113+
interval: interval,
114+
seen: map[string]entryCount{},
115+
}
116+
}
117+
118+
func (f *dedupeFormatter) Format(entry *log.Entry) ([]byte, error) {
119+
if f.shouldLog(entry) {
120+
b, err := f.innerFormatter.Format(entry)
121+
return b, err
122+
}
123+
return []byte{}, nil
124+
}
125+
126+
func (f *dedupeFormatter) shouldLog(entry *log.Entry) bool {
127+
if _, ok := entry.Data["deduplicated"]; ok {
128+
// ignore our own logs about deduped messages
129+
return true
130+
}
131+
if entry.Level < f.level {
132+
// ignore logs more severe than our level
133+
return true
134+
}
135+
key := fmt.Sprintf("%s %s", entry.Message, fieldsToString(entry.Data))
136+
f.lock.Lock()
137+
defer f.lock.Unlock()
138+
if ec, ok := f.seen[key]; ok {
139+
// already seen, increment count and do not log
140+
ec.count++
141+
f.seen[key] = ec
142+
return false
143+
}
144+
// New message, log it but add it to seen.
145+
// We need to copy because the pointer ceases to be valid after we return from Format
146+
f.seen[key] = entryCount{entry: *entry}
147+
go f.evictEntry(key) // queue to evict later
148+
return true
149+
}
150+
151+
// Wait for interval seconds then evict the entry and send the log
152+
func (f *dedupeFormatter) evictEntry(key string) {
153+
time.Sleep(f.interval)
154+
var ec entryCount
155+
func() {
156+
f.lock.Lock()
157+
defer f.lock.Unlock()
158+
ec = f.seen[key]
159+
delete(f.seen, key)
160+
}()
161+
if ec.count == 0 {
162+
return
163+
}
164+
entry := log.WithFields(ec.entry.Data).WithField("deduplicated", ec.count)
165+
message := fmt.Sprintf("Repeated %d times: %s", ec.count, ec.entry.Message)
166+
// There's no way to choose the log level dynamically, so we have to do this hack
167+
map[log.Level]func(args ...interface{}){
168+
log.PanicLevel: entry.Panic,
169+
log.FatalLevel: entry.Fatal,
170+
log.ErrorLevel: entry.Error,
171+
log.WarnLevel: entry.Warn,
172+
log.InfoLevel: entry.Info,
173+
log.DebugLevel: entry.Debug,
174+
}[ec.entry.Level](message)
175+
}
176+
177+
func fieldsToString(data log.Fields) string {
178+
parts := make([]string, 0, len(data))
179+
// traversal order here is arbitrary but stable, which is fine for our purposes
180+
for k, v := range data {
181+
parts = append(parts, fmt.Sprintf("%s=%v", k, v))
182+
}
183+
return strings.Join(parts, " ")
184+
}

0 commit comments

Comments
 (0)