Skip to content

Commit 8bfd7cc

Browse files
authored
feat: support generating the groundtruth and evaluate on it (#68)
* feat: support generating the groundtruth and evaluate on it Signed-off-by: Keming <[email protected]> * fix ci test Signed-off-by: Keming <[email protected]> * add docs Signed-off-by: Keming <[email protected]> * add example to docs Signed-off-by: Keming <[email protected]> * update doc Signed-off-by: Keming <[email protected]> --------- Signed-off-by: Keming <[email protected]>
1 parent 55385fb commit 8bfd7cc

File tree

15 files changed

+813
-438
lines changed

15 files changed

+813
-438
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ publish: build
2121
@uv publish
2222

2323
test:
24+
@uv sync --extra spacy --inexact
2425
@uv run -- pytest -v tests
2526

2627
sync:

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ The related Docker images can be found in [VectorChord Suite][vectorchord-suite]
4242
- [x] `Extractor` to extract the content from PDF, HTML, etc.
4343
- [x] `EntityRecognizer` to extract the entities and relations from the text
4444
- [x] `Reranker` for hybrid search
45+
- [x] `GroundTruth` to generate the ground truth for evaluation
4546

4647
## Examples
4748

docs/source/api.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@
4545
:show-inheritance:
4646
```
4747

48+
## Extract
49+
50+
```{eval-rst}
51+
.. automodule:: vechord.extract
52+
:members:
53+
:show-inheritance:
54+
```
55+
4856
## Evaluate
4957

5058
```{eval-rst}
@@ -53,10 +61,18 @@
5361
:show-inheritance:
5462
```
5563

56-
## Extract
64+
## GroundTruth
5765

5866
```{eval-rst}
59-
.. automodule:: vechord.extract
67+
.. automodule:: vechord.groundtruth
68+
:members: GroundTruth
69+
:show-inheritance:
70+
```
71+
72+
## Graph
73+
74+
```{eval-rst}
75+
.. automodule:: vechord.graph
6076
:members:
6177
:show-inheritance:
6278
```

docs/source/utils.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,18 @@ pip install vechord[openai,spacy,cohere]
2222
- {py:class}`~vechord.embedding.VoyageDenseEmbedding`: VoyageAI embedding
2323
- {py:class}`~vechord.embedding.SpacyDenseEmbedding`: Spacy embedding
2424
- Evaluate
25-
- {py:class}`~vechord.evaluate.GeminiEvaluator`: Gemini based evaluator
25+
- {py:class}`~vechord.evaluate.GeminiEvaluator`: Gemini based query generator
26+
- {py:class}`~vechord.evaluate.GeminiUMBRELAEvaluator`: Gemini UMBRELA evaluator
2627
- Extract
2728
- {py:class}`~vechord.extract.SimpleExtractor`: Simple extractor
2829
- {py:class}`~vechord.extract.GeminiExtractor`: Gemini extractor
30+
- {py:class}`~vechord.extract.LlamaParseExtractor`: Llama extractor
31+
- Graph
32+
- {py:class}`~vechord.graph.SpacyEntityRecognizer`: Spacy based entity recognizer
33+
- {py:class}`~vechord.graph.GeminiEntityRecognizer`: Gemini based entity recognizer
34+
- GroundTruth
35+
- {py:class}`~vechord.groundtruth.GroundTruth`: generate ground truth
2936
- Rerank
3037
- {py:class}`~vechord.rerank.CohereReranker`: Cohere reranker
38+
- {py:class}`~vechord.rerank.JinaReranker`: Jina MultiModal reranker
3139
- {py:class}`~vechord.rerank.ReciprocalRankFusion`: fuse function for hybrid retrieval

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def anyio_backend():
2626
@pytest.fixture(name="registry")
2727
async def fixture_registry(request):
2828
namespace = request.node.obj.__name__
29-
tables = request.param or ()
29+
tables = getattr(request, "param", ())
3030
async with VechordRegistry(namespace, TEST_POSTGRES, tables=tables) as registry:
3131
yield registry
3232

tests/test_groundtruth.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import uuid
2+
from unittest.mock import AsyncMock
3+
4+
import pytest
5+
6+
from vechord.client import set_namespace
7+
from vechord.groundtruth import GroundTruth
8+
from vechord.registry import VechordRegistry
9+
from vechord.spec import _DefaultChunk
10+
11+
pytestmark = pytest.mark.anyio
12+
13+
14+
@pytest.fixture(name="ground_truth_cleanup")
15+
async def fixture_ground_truth_cleanup(request, registry: VechordRegistry):
16+
namespace = request.node.obj.__name__
17+
yield
18+
# cleanup
19+
async with set_namespace(namespace):
20+
await registry.client.drop("test_query")
21+
22+
23+
async def test_ground_truth(registry: VechordRegistry, ground_truth_cleanup):
24+
queries = [
25+
"What is the largest mammal?",
26+
"What is the longest river in the world?",
27+
"What is the smallest bird?",
28+
]
29+
30+
async def mock_retrieve(query: str):
31+
return [
32+
_DefaultChunk(
33+
uid=uuid.uuid5(uuid.NAMESPACE_DNS, query),
34+
doc_id=None,
35+
text=query,
36+
vec=None,
37+
keyword=None,
38+
)
39+
]
40+
41+
async def mock_estimate(query: str, passage: str, chunk_type=None):
42+
return 1.0 + 2.0 if query == passage else 0.0
43+
44+
retrieve = AsyncMock()
45+
retrieve.side_effect = mock_retrieve
46+
evaluator = AsyncMock()
47+
evaluator.estimate = mock_estimate
48+
evaluator.relevant_threshold = 2.0
49+
50+
ground_truth = GroundTruth(name="test", vr=registry)
51+
await ground_truth.generate(queries, retrieve, evaluator)
52+
53+
assert retrieve.call_count == len(queries)
54+
55+
# evaluate
56+
metric = await ground_truth.evaluate(retrieve=retrieve)
57+
assert metric.ndcg == 1.0, metric

tests/test_run.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import inspect
2+
3+
import pytest
4+
5+
from vechord.client import set_namespace
6+
from vechord.model import ResourceRequest, RunIngestAck, RunRequest
7+
from vechord.pipeline import DynamicPipeline
8+
from vechord.registry import VechordRegistry
9+
from vechord.spec import DefaultDocument
10+
11+
pytestmark = pytest.mark.anyio
12+
13+
14+
@pytest.fixture(name="run_pipeline_cleanup")
15+
async def fixture_run_pipeline_cleanup(request, registry: VechordRegistry):
16+
namespace = request.node.obj.__name__
17+
yield
18+
# cleanup
19+
async with set_namespace(namespace):
20+
for table_name in ("defaultdocument", "chunk"):
21+
await registry.client.drop(table_name)
22+
23+
24+
async def test_run_pipeline(registry: VechordRegistry, run_pipeline_cleanup):
25+
steps = [
26+
ResourceRequest(kind="text-emb", provider="spacy", args={}),
27+
ResourceRequest(
28+
kind="chunk", provider="regex", args={"size": 128, "overlap": 0}
29+
),
30+
ResourceRequest(kind="index", provider="vectorchord", args={"vector": {}}),
31+
]
32+
namespace = inspect.currentframe().f_code.co_name
33+
pipe = DynamicPipeline.from_steps(steps=steps)
34+
ack: RunIngestAck = await pipe.run(
35+
RunRequest(name=namespace, data="what to insert".encode(), steps=steps),
36+
vr=registry,
37+
)
38+
assert ack.name == namespace
39+
assert ack.uid
40+
41+
docs = await registry.select_by(DefaultDocument.partial_init())
42+
assert len(docs) == 1

0 commit comments

Comments
 (0)