@@ -408,22 +408,22 @@ export const createTokenSpeedCalculator = (
408408 }
409409 // if the chunk is the stop chunk, set as output finish
410410 if ( inputStartAt && outputStartAt && chunk . type === 'usage' ) {
411- const totalOutputTokens =
412- chunk . data ?. totalOutputTokens ??
413- ( chunk . data ?. outputTextTokens ?? 0 ) + ( chunk . data ?. outputImageTokens ?? 0 ) ;
414- const reasoningTokens = chunk . data ?. outputReasoningTokens ?? 0 ;
415- const outputTokens =
416- ( outputThinking ?? false )
417- ? totalOutputTokens
418- : Math . max ( 0 , totalOutputTokens - reasoningTokens ) ;
411+ // TPS should always include all generated tokens (including reasoning tokens)
412+ // because it measures generation speed, not just visible content
413+ const outputTokens = chunk . data ?. totalOutputTokens ?? 0 ;
419414 const now = Date . now ( ) ;
420415 const elapsed = now - ( enableStreaming ? outputStartAt : inputStartAt ) ;
416+ const duration = now - outputStartAt ;
417+ const latency = now - inputStartAt ;
418+ const ttft = outputStartAt - inputStartAt ;
419+ const tps = elapsed === 0 ? undefined : ( outputTokens / elapsed ) * 1000 ;
420+
421421 result . push ( {
422422 data : {
423- duration : now - outputStartAt ,
424- latency : now - inputStartAt ,
425- tps : elapsed === 0 ? undefined : ( outputTokens / elapsed ) * 1000 ,
426- ttft : outputStartAt - inputStartAt ,
423+ duration,
424+ latency,
425+ tps,
426+ ttft,
427427 } as ModelSpeed ,
428428 id : TOKEN_SPEED_CHUNK_ID ,
429429 type : 'speed' ,
0 commit comments