Skip to content

Commit 99ded4a

Browse files
authored
chore: sync some update (lobehub#9539)
* chore: remove legacy files * 🔧 fix: correct and simplify tps calc * 🔧 fix: remove unnecessary console log in createRuntime function
1 parent 582f6d1 commit 99ded4a

File tree

4 files changed

+12
-486
lines changed

4 files changed

+12
-486
lines changed

packages/model-runtime/src/core/RouterRuntime/createRuntime.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ export const createRouterRuntime = ({
181181
for (const runtimeItem of runtimes) {
182182
const models = runtimeItem.models || [];
183183
if (models.includes(model)) {
184-
console.log(`get runtime ${runtimeItem.id} ${model}`);
185184
return runtimeItem.runtime;
186185
}
187186
}

packages/model-runtime/src/core/streams/protocol.ts

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -408,22 +408,22 @@ export const createTokenSpeedCalculator = (
408408
}
409409
// if the chunk is the stop chunk, set as output finish
410410
if (inputStartAt && outputStartAt && chunk.type === 'usage') {
411-
const totalOutputTokens =
412-
chunk.data?.totalOutputTokens ??
413-
(chunk.data?.outputTextTokens ?? 0) + (chunk.data?.outputImageTokens ?? 0);
414-
const reasoningTokens = chunk.data?.outputReasoningTokens ?? 0;
415-
const outputTokens =
416-
(outputThinking ?? false)
417-
? totalOutputTokens
418-
: Math.max(0, totalOutputTokens - reasoningTokens);
411+
// TPS should always include all generated tokens (including reasoning tokens)
412+
// because it measures generation speed, not just visible content
413+
const outputTokens = chunk.data?.totalOutputTokens ?? 0;
419414
const now = Date.now();
420415
const elapsed = now - (enableStreaming ? outputStartAt : inputStartAt);
416+
const duration = now - outputStartAt;
417+
const latency = now - inputStartAt;
418+
const ttft = outputStartAt - inputStartAt;
419+
const tps = elapsed === 0 ? undefined : (outputTokens / elapsed) * 1000;
420+
421421
result.push({
422422
data: {
423-
duration: now - outputStartAt,
424-
latency: now - inputStartAt,
425-
tps: elapsed === 0 ? undefined : (outputTokens / elapsed) * 1000,
426-
ttft: outputStartAt - inputStartAt,
423+
duration,
424+
latency,
425+
tps,
426+
ttft,
427427
} as ModelSpeed,
428428
id: TOKEN_SPEED_CHUNK_ID,
429429
type: 'speed',

packages/model-runtime/src/utils/usageConverter.test.ts

Lines changed: 0 additions & 351 deletions
This file was deleted.

0 commit comments

Comments
 (0)