-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Add experimental monitor mode to BasicCrawler #2692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
41f1149
c7c6a8b
9f669e4
8d48c6a
b5e81cb
47e294f
d4ae373
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,6 +60,7 @@ import type { OptionsInit, Method } from 'got-scraping'; | |
| import ow, { ArgumentError } from 'ow'; | ||
| import { getDomain } from 'tldts'; | ||
| import type { SetRequired } from 'type-fest'; | ||
| import { Monitor } from '@crawlee/core/src/monitor'; | ||
|
|
||
| export interface BasicCrawlingContext<UserData extends Dictionary = Dictionary> | ||
| extends CrawlingContext<BasicCrawler, UserData> { | ||
|
|
@@ -367,6 +368,13 @@ export interface CrawlerExperiments { | |
| * - set `requestLocking` to `false` in the `experiments` option of the crawler | ||
| */ | ||
| requestLocking?: boolean; | ||
| /** | ||
| * Experimental cli output monitor mode | ||
| * If you encounter issues due to this change, please: | ||
| * - report it to us: https://github.com/apify/crawlee | ||
| * - set `requestLocking` to `false` in the `experiments` option of the crawler | ||
|
||
| */ | ||
| monitor?: boolean; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -904,11 +912,15 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext | |
| this.events.on(EventType.MIGRATING, boundPauseOnMigration); | ||
| this.events.on(EventType.ABORTING, boundPauseOnMigration); | ||
|
|
||
| const monitor = this.experiments.monitor ? new Monitor(this.stats, this.log) : null; | ||
|
||
| monitor?.start(); | ||
|
|
||
| try { | ||
| await this.autoscaledPool!.run(); | ||
| } finally { | ||
| await this.teardown(); | ||
| await this.stats.stopCapturing(); | ||
| monitor?.stop(); | ||
|
|
||
| process.off('SIGINT', sigintHandler); | ||
| this.events.off(EventType.MIGRATING, boundPauseOnMigration); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| import { log as defaultLog, Log } from './log'; | ||
| import { Statistics } from './crawlers/statistics'; | ||
| import os from 'os'; | ||
|
|
||
| export class Monitor { | ||
| private log: Log; | ||
| private statistics: Statistics; | ||
| private intervalId: NodeJS.Timeout | null = null; | ||
|
|
||
| constructor(statistics: Statistics, log: Log = defaultLog) { | ||
| this.statistics = statistics; | ||
| this.log = log.child({ prefix: 'Monitor' }); | ||
| } | ||
|
|
||
| start(interval: number = 5000) { | ||
| this.intervalId = setInterval(() => { | ||
| this.display(); | ||
| }, interval); | ||
| } | ||
|
|
||
| stop() { | ||
| if (this.intervalId) { | ||
| clearInterval(this.intervalId); | ||
| this.intervalId = null; | ||
| } | ||
| } | ||
|
|
||
| private display() { | ||
| const stats = this.statistics.calculate(); | ||
| const now = new Date(); | ||
| const startTime = this.statistics.state.crawlerStartedAt; | ||
| const elapsedTime = now.getTime() - new Date(startTime!).getTime(); | ||
| const cpuLoad = os.loadavg()[0]; | ||
| const memLoad = (os.totalmem() - os.freemem()) / os.totalmem(); | ||
|
|
||
| this.log.info(` | ||
| Start: ${startTime} | ||
| Now: ${now} (running for ${elapsedTime / 1000}s) | ||
| Progress: ${this.statistics.state.requestsFinished} / ${stats.requestsTotal} (${(this.statistics.state.requestsFinished / stats.requestsTotal) * 100}%), failed: ${this.statistics.state.requestsFailed} (${(this.statistics.state.requestsFailed / stats.requestsTotal) * 100}%) | ||
| Remaining: ${this.estimateRemainingTime(stats)} (${stats.requestsFinishedPerMinute} req/min) | ||
| Sys. load: ${cpuLoad.toFixed(2)} / ${(memLoad * 100).toFixed(2)}% | ||
| Concurrencies: ${this.statistics.state.requestsRetries} | ||
| `); | ||
| } | ||
|
|
||
| private estimateRemainingTime(stats: ReturnType<Statistics['calculate']>) { | ||
| const remainingRequests = stats.requestsTotal - this.statistics.state.requestsFinished; | ||
| const avgDuration = stats.requestAvgFinishedDurationMillis; | ||
| return (remainingRequests * avgDuration) / 1000; | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.