Skip to content
16 changes: 12 additions & 4 deletions src/ragas/integrations/llama_index.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from __future__ import annotations

import logging
import math
import typing as t

from ragas.dataset_schema import EvaluationDataset, SingleTurnSample
from ragas.dataset_schema import EvaluationDataset, EvaluationResult, SingleTurnSample
from ragas.embeddings import LlamaIndexEmbeddingsWrapper
from ragas.evaluation import evaluate as ragas_evaluate
from ragas.executor import Executor
Expand All @@ -18,10 +19,10 @@
BaseEmbedding as LlamaIndexEmbeddings,
)
from llama_index.core.base.llms.base import BaseLLM as LlamaindexLLM
from llama_index.core.base.response.schema import Response as LlamaIndexResponse
from llama_index.core.workflow import Event

from ragas.cost import TokenUsageParser
from ragas.evaluation import EvaluationResult


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -82,8 +83,15 @@ def evaluate(
retrieved_contexts: t.List[t.List[str]] = []
results = exec.results()
for r in results:
responses.append(r.response)
retrieved_contexts.append([n.node.text for n in r.source_nodes])
# Handle failed jobs which are recorded as NaN in the executor
if isinstance(r, float) and math.isnan(r):
responses.append("")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to fail loudly than silently.

If we still need to pass through, better to keep None. The later metrics can skip None or handle them explicitly.

responses.append(None)
retrieved_contexts.append(None)
logger.warning(f"Query engine failed for query {i}: '{queries[i]}'")

retrieved_contexts.append([])
else:
# Cast to LlamaIndex Response type for proper type checking
response = t.cast("LlamaIndexResponse", r)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This'll be hard on type hints.

Probably better to take from llama_index.core.base.response.schema import Response as LlamaIndexResponse

responses.append(response.response or "")
Copy link
Contributor

@anistark anistark Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this more explicit?

responses.append(response.response if response.response is not None else "")

retrieved_contexts.append([n.get_text() for n in response.source_nodes])

# append the extra information to the dataset
for i, sample in enumerate(samples):
Expand Down
Loading