|
| 1 | +'use strict'; |
| 2 | + |
| 3 | +const path = require('node:path'); |
| 4 | +const { fork } = require('node:child_process'); |
| 5 | +const fs = require('node:fs'); |
| 6 | +const { styleText } = require('node:util'); |
| 7 | + |
| 8 | +const DEFAULT_RUNS = 30; // Number of runs for each n value |
| 9 | +const CV_THRESHOLD = 0.05; // 5% coefficient of variation threshold |
| 10 | +const MAX_N_INCREASE = 6; // Maximum number of times to increase n (10**6) |
| 11 | +const INCREASE_FACTOR = 10; // Factor by which to increase n |
| 12 | + |
| 13 | +const args = process.argv.slice(2); |
| 14 | +if (args.length === 0) { |
| 15 | + console.log(` |
| 16 | +Usage: node calibrate-n.js [options] <benchmark_path> |
| 17 | +
|
| 18 | +Options: |
| 19 | + --runs=N Number of runs for each n value (default: ${DEFAULT_RUNS}) |
| 20 | + --cv-threshold=N Target coefficient of variation threshold (default: ${CV_THRESHOLD}) |
| 21 | + --max-increases=N Maximum number of n increases to try (default: ${MAX_N_INCREASE}) |
| 22 | + --start-n=N Initial n value to start with (default: autodetect) |
| 23 | + --increase=N Factor by which to increase n (default: ${INCREASE_FACTOR}) |
| 24 | +
|
| 25 | +Example: |
| 26 | + node calibrate-n.js buffers/buffer-compare.js |
| 27 | + node calibrate-n.js --runs=10 --cv-threshold=0.02 buffers/buffer-compare.js |
| 28 | + `); |
| 29 | + process.exit(1); |
| 30 | +} |
| 31 | + |
| 32 | +// Extract options |
| 33 | +let benchmarkPath; |
| 34 | +let runs = DEFAULT_RUNS; |
| 35 | +let cvThreshold = CV_THRESHOLD; |
| 36 | +let maxIncreases = MAX_N_INCREASE; |
| 37 | +let startN = 10; |
| 38 | +let increaseFactor = INCREASE_FACTOR; |
| 39 | + |
| 40 | +for (const arg of args) { |
| 41 | + if (arg.startsWith('--runs=')) { |
| 42 | + runs = parseInt(arg.substring(7), 10); |
| 43 | + } else if (arg.startsWith('--cv-threshold=')) { |
| 44 | + cvThreshold = parseFloat(arg.substring(14)); |
| 45 | + } else if (arg.startsWith('--max-increases=')) { |
| 46 | + maxIncreases = parseInt(arg.substring(15), 10); |
| 47 | + if (isNaN(maxIncreases)) { |
| 48 | + console.error(`Error: Invalid value for --max-increases. Using default: ${MAX_N_INCREASE}`); |
| 49 | + maxIncreases = MAX_N_INCREASE; |
| 50 | + } |
| 51 | + } else if (arg.startsWith('--start-n=')) { |
| 52 | + startN = parseInt(arg.substring(10), 10); |
| 53 | + if (isNaN(startN)) { |
| 54 | + console.error(`Error: Invalid value for --start-n. Using default: 10`); |
| 55 | + startN = 10; |
| 56 | + } |
| 57 | + } else if (arg.startsWith('--increase=')) { |
| 58 | + increaseFactor = parseInt(arg.substring(11), 10); |
| 59 | + if (isNaN(increaseFactor)) { |
| 60 | + console.error(`Error: Invalid value for --increase. Using default: ${INCREASE_FACTOR}`); |
| 61 | + increaseFactor = INCREASE_FACTOR; |
| 62 | + } |
| 63 | + } else { |
| 64 | + benchmarkPath = arg; |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +if (!benchmarkPath) { |
| 69 | + console.error('Error: No benchmark path specified'); |
| 70 | + process.exit(1); |
| 71 | +} |
| 72 | + |
| 73 | +const fullBenchmarkPath = path.resolve(benchmarkPath); |
| 74 | +if (!fs.existsSync(fullBenchmarkPath)) { |
| 75 | + console.error(`Error: Benchmark file not found: ${fullBenchmarkPath}`); |
| 76 | + process.exit(1); |
| 77 | +} |
| 78 | + |
| 79 | +function calculateStats(values) { |
| 80 | + const mean = values.reduce((sum, val) => sum + val, 0) / values.length; |
| 81 | + |
| 82 | + const squaredDiffs = values.map((val) => { |
| 83 | + const diff = val - mean; |
| 84 | + const squared = diff ** 2; |
| 85 | + return squared; |
| 86 | + }); |
| 87 | + |
| 88 | + const variance = squaredDiffs.reduce((sum, val) => sum + val, 0) / values.length; |
| 89 | + const stdDev = Math.sqrt(variance); |
| 90 | + const cv = stdDev / mean; |
| 91 | + |
| 92 | + return { mean, stdDev, cv, variance }; |
| 93 | +} |
| 94 | + |
| 95 | +function runBenchmark(n) { |
| 96 | + return new Promise((resolve, reject) => { |
| 97 | + const child = fork( |
| 98 | + fullBenchmarkPath, |
| 99 | + [`n=${n}`], |
| 100 | + { stdio: ['inherit', 'pipe', 'inherit', 'ipc'] }, |
| 101 | + ); |
| 102 | + |
| 103 | + const results = []; |
| 104 | + child.on('message', (data) => { |
| 105 | + if (data.type === 'report' && data.rate && data.conf) { |
| 106 | + results.push({ |
| 107 | + rate: data.rate, |
| 108 | + conf: data.conf, |
| 109 | + }); |
| 110 | + } |
| 111 | + }); |
| 112 | + |
| 113 | + child.on('close', (code) => { |
| 114 | + if (code !== 0) { |
| 115 | + reject(new Error(`Benchmark exited with code ${code}`)); |
| 116 | + } else { |
| 117 | + resolve(results); |
| 118 | + } |
| 119 | + }); |
| 120 | + }); |
| 121 | +} |
| 122 | + |
| 123 | +async function main(n = startN) { |
| 124 | + let increaseCount = 0; |
| 125 | + let bestN = n; |
| 126 | + let bestCV = Infinity; |
| 127 | + let bestGroupStats = null; |
| 128 | + |
| 129 | + console.log(` |
| 130 | +-------------------------------------------------------- |
| 131 | +Benchmark: ${benchmarkPath} |
| 132 | +-------------------------------------------------------- |
| 133 | +What we are trying to find: The optimal number of iterations (n) |
| 134 | +that produces consistent benchmark results without wasting time. |
| 135 | +
|
| 136 | +How it works: |
| 137 | +1. Run the benchmark multiple times with a specific n value |
| 138 | +2. Group results by configuration |
| 139 | +3. If overall CV is above 5% or any configuration has CV above 10%, increase n and try again |
| 140 | +
|
| 141 | +Configuration: |
| 142 | +- Starting n: ${n.toLocaleString()} iterations |
| 143 | +- Runs per n value: ${runs} |
| 144 | +- Target CV threshold: ${cvThreshold * 100}% (lower CV = more stable results) |
| 145 | +- Max increases: ${maxIncreases} |
| 146 | +- Increase factor: ${increaseFactor}x`); |
| 147 | + |
| 148 | + while (increaseCount < maxIncreases) { |
| 149 | + console.log(`\nTesting with n=${n}:`); |
| 150 | + |
| 151 | + const resultsData = []; |
| 152 | + for (let i = 0; i < runs; i++) { |
| 153 | + const results = await runBenchmark(n); |
| 154 | + // Each run might return multiple results (one per configuration) |
| 155 | + if (Array.isArray(results) && results.length > 0) { |
| 156 | + resultsData.push(...results); |
| 157 | + } else if (results) { |
| 158 | + resultsData.push(results); |
| 159 | + } |
| 160 | + process.stdout.write('.'); |
| 161 | + } |
| 162 | + process.stdout.write('\n'); |
| 163 | + |
| 164 | + const groupedResults = {}; |
| 165 | + resultsData.forEach((result) => { |
| 166 | + if (!result || !result.conf) return; |
| 167 | + |
| 168 | + const confKey = JSON.stringify(result.conf); |
| 169 | + groupedResults[confKey] ||= { |
| 170 | + conf: result.conf, |
| 171 | + rates: [], |
| 172 | + }; |
| 173 | + |
| 174 | + groupedResults[confKey].rates.push(result.rate); |
| 175 | + }); |
| 176 | + |
| 177 | + const groupStats = []; |
| 178 | + for (const [confKey, group] of Object.entries(groupedResults)) { |
| 179 | + console.log(`\nConfiguration: ${JSON.stringify(group.conf)}`); |
| 180 | + |
| 181 | + const stats = calculateStats(group.rates); |
| 182 | + console.log(` CV: ${(stats.cv * 100).toFixed(2)}% (lower values mean more stable results)`); |
| 183 | + |
| 184 | + const isStable = stats.cv <= cvThreshold; |
| 185 | + console.log(` Stability: ${isStable ? |
| 186 | + styleText(['bold', 'green'], '✓ Stable') : |
| 187 | + styleText(['bold', 'red'], '✗ Unstable')}`); |
| 188 | + |
| 189 | + groupStats.push({ |
| 190 | + confKey, |
| 191 | + stats, |
| 192 | + isStable, |
| 193 | + }); |
| 194 | + } |
| 195 | + |
| 196 | + if (groupStats.length > 0) { |
| 197 | + // Check if any configuration has CV > 10% (too unstable) |
| 198 | + const tooUnstableConfigs = groupStats.filter((g) => g.stats.cv > 0.10); |
| 199 | + |
| 200 | + const avgCV = groupStats.reduce((sum, g) => sum + g.stats.cv, 0) / groupStats.length; |
| 201 | + console.log(`\nOverall average CV: ${(avgCV * 100).toFixed(2)}%`); |
| 202 | + |
| 203 | + const isOverallStable = avgCV < CV_THRESHOLD; |
| 204 | + const hasVeryUnstableConfigs = tooUnstableConfigs.length > 0; |
| 205 | + |
| 206 | + // Check if overall CV is below CV_THRESHOLD and no configuration has CV > 10% |
| 207 | + if (isOverallStable && !hasVeryUnstableConfigs) { |
| 208 | + console.log(styleText(['bold', 'green'], ` ✓ Overall CV is below 5% and no configuration has CV above 10%`)); |
| 209 | + } else { |
| 210 | + if (!isOverallStable) { |
| 211 | + console.log(styleText(['bold', 'red'], ` ✗ Overall CV (${(avgCV * 100).toFixed(2)}%) is above 5%`)); |
| 212 | + } |
| 213 | + if (hasVeryUnstableConfigs) { |
| 214 | + console.log(styleText(['bold', 'red'], ` ✗ ${tooUnstableConfigs.length} configuration(s) have CV above 10%`)); |
| 215 | + } |
| 216 | + } |
| 217 | + |
| 218 | + if (avgCV < bestCV || !bestGroupStats) { |
| 219 | + bestN = n; |
| 220 | + bestCV = avgCV; |
| 221 | + |
| 222 | + bestGroupStats = []; |
| 223 | + for (const group of Object.values(groupedResults)) { |
| 224 | + if (group.rates.length >= 3) { |
| 225 | + const stats = calculateStats(group.rates); |
| 226 | + bestGroupStats.push({ |
| 227 | + conf: group.conf, |
| 228 | + stats: stats, |
| 229 | + isStable: stats.cv <= 0.10, |
| 230 | + }); |
| 231 | + } |
| 232 | + } |
| 233 | + console.log(` → New best n: ${n} with average CV: ${(avgCV * 100).toFixed(2)}%`); |
| 234 | + } else { |
| 235 | + console.log(` → Current best n remains: ${bestN} with average CV: ${(bestCV * 100).toFixed(2)}%`); |
| 236 | + } |
| 237 | + } |
| 238 | + |
| 239 | + // Check if we've reached acceptable stability based on new criteria |
| 240 | + // 1. Overall CV should be below CV_THRESHOLD |
| 241 | + // 2. No configuration should have a CV greater than 10% |
| 242 | + const avgCV = groupStats.length > 0 ? |
| 243 | + groupStats.reduce((sum, g) => sum + g.stats.cv, 0) / groupStats.length : Infinity; |
| 244 | + const hasUnstableConfig = groupStats.some((g) => g.stats.cv > 0.10); |
| 245 | + const isOverallStable = avgCV < CV_THRESHOLD; |
| 246 | + |
| 247 | + if (isOverallStable && !hasUnstableConfig) { |
| 248 | + console.log(`\n✓ Found optimal n=${n} (Overall CV=${(avgCV * 100).toFixed(2)}% < 5% and no configuration has CV > 10%)`); |
| 249 | + console.log('\nFinal CV for each configuration:'); |
| 250 | + groupStats.forEach((g) => { |
| 251 | + console.log(` ${JSON.stringify(groupedResults[g.confKey].conf)}: ${(g.stats.cv * 100).toFixed(2)}%`); |
| 252 | + }); |
| 253 | + |
| 254 | + return n; |
| 255 | + } |
| 256 | + |
| 257 | + increaseCount++; |
| 258 | + n *= increaseFactor; |
| 259 | + } |
| 260 | + |
| 261 | + if (increaseCount >= maxIncreases) { |
| 262 | + const finalAvgCV = bestGroupStats && bestGroupStats.length > 0 ? |
| 263 | + bestGroupStats.reduce((sum, g) => sum + g.stats.cv, 0) / bestGroupStats.length : Infinity; |
| 264 | + |
| 265 | + console.log(`Maximum number of increases (${maxIncreases}) reached without achieving target stability`); |
| 266 | + console.log(`Best n found: ${bestN} with average CV=${(finalAvgCV * 100).toFixed(2)}%`); |
| 267 | + console.log(`\nCV by configuration at best n:`); |
| 268 | + |
| 269 | + if (bestGroupStats) { |
| 270 | + bestGroupStats.forEach((g) => { |
| 271 | + if (g.conf) { |
| 272 | + console.log(` ${JSON.stringify(g.conf)}: ${(g.stats.cv * 100).toFixed(2)}%`); |
| 273 | + if (g.stats.cv > cvThreshold) { |
| 274 | + console.log(` ⚠️ This configuration is above the target threshold of ${cvThreshold * 100}%`); |
| 275 | + } |
| 276 | + } |
| 277 | + }); |
| 278 | + } |
| 279 | + } |
| 280 | + |
| 281 | + console.log(` |
| 282 | +Recommendation: You might want to try increasing --max-increases to |
| 283 | +continue testing with larger n values, or adjust --cv-threshold to |
| 284 | +accept the current best result, or investigate if specific configurations |
| 285 | +are contributing to instability.`); |
| 286 | + return bestN; |
| 287 | +} |
| 288 | + |
| 289 | +main().catch((err) => { |
| 290 | + console.error('Error:', err); |
| 291 | + process.exit(1); |
| 292 | +}); |
0 commit comments