-
Notifications
You must be signed in to change notification settings - Fork 238
Node metrics #948
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Node metrics #948
Changes from 34 commits
Commits
Show all changes
35 commits
Select commit
Hold shift + click to select a range
362a390
Add metrics to relay.
cody-littley 3b4637e
Incremental progress.
cody-littley 60f015e
Incremental progress.
cody-littley 2d7e9ef
Incremental progress, need running averages.
cody-littley b8c7d35
Added running average metrics for GetChunks
cody-littley a6692c4
Merge branch 'master' into relay-metrics
cody-littley b9d71d6
Documentation
cody-littley 671f0c8
Add time window to metrics doc
cody-littley 4adb7ea
Added GetBlob metrics.
cody-littley 5579a88
Cleanup.
cody-littley 2b84f21
Cleanup test
cody-littley fb0cad5
Add locking for running average metric.
cody-littley a2c05cb
Merge branch 'master' into relay-metrics
cody-littley dfd2925
Add cache metrics.
cody-littley 24f5f5d
Fix test bug
cody-littley c3adb70
Made suggested change.
cody-littley 5c8c173
Added metrics for v2 DA node.
cody-littley 1795654
Added metrics documentation.
cody-littley 434c6b9
Merge branch 'master' into node-metrics
cody-littley 5c9274c
Revert deletions.
cody-littley 4d4bfe9
Remove documentation.
cody-littley d9d898c
Reimplement without metrics framework.
cody-littley 8bd8ff1
Cleanup.
cody-littley 2070eee
Stop background thread when metrics are stopped.
cody-littley cffa884
Revert unintentional change
cody-littley 5c511c9
Made suggested changes.
cody-littley a15117f
Don't start two metrics servers.
cody-littley 1076a8f
Fix compile issue.
cody-littley bbf9005
Merge branch 'master' into node-metrics
cody-littley 62ec4f6
Merge branch 'master' into node-metrics
cody-littley 143b798
Enable debug code.
cody-littley 168ded5
Debug
cody-littley dd21f61
Fix inabox bug.
cody-littley 1bb1404
Made suggested changes.
cody-littley 864e7d0
Made suggested changes.
cody-littley File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
package grpc | ||
|
||
import ( | ||
"github.com/Layr-Labs/eigensdk-go/logging" | ||
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/client_golang/prometheus/promauto" | ||
"google.golang.org/grpc" | ||
"os" | ||
"path/filepath" | ||
"sync/atomic" | ||
"time" | ||
) | ||
|
||
const namespace = "eigenda_node" | ||
|
||
// MetricsV2 encapsulates metrics for the v2 DA node. | ||
type MetricsV2 struct { | ||
logger logging.Logger | ||
|
||
registry *prometheus.Registry | ||
grpcServerOption grpc.ServerOption | ||
|
||
storeChunksLatency *prometheus.SummaryVec | ||
storeChunksDataSize *prometheus.GaugeVec | ||
|
||
getChunksLatency *prometheus.SummaryVec | ||
getChunksDataSize *prometheus.GaugeVec | ||
|
||
dbSize *prometheus.GaugeVec | ||
dbSizePollPeriod time.Duration | ||
dbDir string | ||
isAlive *atomic.Bool | ||
} | ||
|
||
// NewV2Metrics creates a new MetricsV2 instance. dbSizePollPeriod is the period at which the database size is polled. | ||
// If set to 0, the database size is not polled. | ||
func NewV2Metrics( | ||
logger logging.Logger, | ||
registry *prometheus.Registry, | ||
dbDir string, | ||
dbSizePollPeriod time.Duration) (*MetricsV2, error) { | ||
|
||
// These should be re-enabled once the legacy v1 metrics are removed. | ||
//registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) | ||
//registry.MustRegister(collectors.NewGoCollector()) | ||
|
||
grpcMetrics := grpcprom.NewServerMetrics() | ||
registry.MustRegister(grpcMetrics) | ||
grpcServerOption := grpc.UnaryInterceptor( | ||
grpcMetrics.UnaryServerInterceptor(), | ||
) | ||
|
||
storeChunksLatency := promauto.With(registry).NewSummaryVec( | ||
cody-littley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
prometheus.SummaryOpts{ | ||
Namespace: namespace, | ||
Name: "store_chunks_latency_ms", | ||
Help: "The latency of a StoreChunks() RPC call.", | ||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, | ||
}, | ||
[]string{}, | ||
) | ||
|
||
storeChunksDataSize := promauto.With(registry).NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Namespace: namespace, | ||
Name: "store_chunks_data_size_bytes", | ||
Help: "The size of the data requested to be stored by StoreChunks() RPC calls.", | ||
}, | ||
[]string{}, | ||
) | ||
|
||
getChunksLatency := promauto.With(registry).NewSummaryVec( | ||
prometheus.SummaryOpts{ | ||
Namespace: namespace, | ||
Name: "get_chunks_latency_ms", | ||
Help: "The latency of a GetChunks() RPC call.", | ||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, | ||
}, | ||
[]string{}, | ||
) | ||
|
||
getChunksDataSize := promauto.With(registry).NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Namespace: namespace, | ||
Name: "get_chunks_data_size_bytes", | ||
Help: "The size of the data requested to be retrieved by GetChunks() RPC calls.", | ||
}, | ||
[]string{}, | ||
) | ||
|
||
dbSize := promauto.With(registry).NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Namespace: namespace, | ||
Name: "db_size_bytes", | ||
Help: "The size of the leveldb database.", | ||
}, | ||
[]string{}, | ||
) | ||
isAlive := &atomic.Bool{} | ||
isAlive.Store(true) | ||
|
||
return &MetricsV2{ | ||
logger: logger, | ||
registry: registry, | ||
grpcServerOption: grpcServerOption, | ||
storeChunksLatency: storeChunksLatency, | ||
storeChunksDataSize: storeChunksDataSize, | ||
getChunksLatency: getChunksLatency, | ||
getChunksDataSize: getChunksDataSize, | ||
dbSize: dbSize, | ||
dbSizePollPeriod: dbSizePollPeriod, | ||
dbDir: dbDir, | ||
isAlive: isAlive, | ||
}, nil | ||
} | ||
|
||
// Start starts the metrics server. | ||
func (m *MetricsV2) Start() { | ||
if m.dbSizePollPeriod.Nanoseconds() == 0 { | ||
return | ||
} | ||
go func() { | ||
ticker := time.NewTicker(m.dbSizePollPeriod) | ||
|
||
for m.isAlive.Load() { | ||
var size int64 | ||
err := filepath.Walk(m.dbDir, func(_ string, info os.FileInfo, err error) error { | ||
if err != nil { | ||
return err | ||
} | ||
if !info.IsDir() { | ||
size += info.Size() | ||
} | ||
return err | ||
}) | ||
|
||
if err != nil { | ||
m.logger.Errorf("failed to get database size: %v", err) | ||
} else { | ||
m.dbSize.WithLabelValues().Set(float64(size)) | ||
} | ||
<-ticker.C | ||
} | ||
}() | ||
|
||
} | ||
|
||
// Stop stops the metrics server. | ||
func (m *MetricsV2) Stop() { | ||
m.isAlive.Store(false) | ||
} | ||
|
||
// GetGRPCServerOption returns the gRPC server option that enables automatic GRPC metrics collection. | ||
func (m *MetricsV2) GetGRPCServerOption() grpc.ServerOption { | ||
return m.grpcServerOption | ||
} | ||
|
||
func (m *MetricsV2) ReportStoreChunksLatency(latency time.Duration) { | ||
m.storeChunksLatency.WithLabelValues().Observe( | ||
float64(latency.Nanoseconds()) / float64(time.Millisecond)) | ||
} | ||
|
||
func (m *MetricsV2) ReportStoreChunksDataSize(size uint64) { | ||
m.storeChunksDataSize.WithLabelValues().Set(float64(size)) | ||
} | ||
|
||
func (m *MetricsV2) ReportGetChunksLatency(latency time.Duration) { | ||
m.getChunksLatency.WithLabelValues().Observe( | ||
float64(latency.Nanoseconds()) / float64(time.Millisecond)) | ||
} | ||
|
||
func (m *MetricsV2) ReportGetChunksDataSize(size int) { | ||
m.getChunksDataSize.WithLabelValues().Set(float64(size)) | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.