Skip to content

Commit 9957fbc

Browse files
authored
enhance: Update hello_hybrid_sparse_dense.py example to include BGE reranker (#2028)
Signed-off-by: Buqian Zheng <[email protected]>
1 parent 7809192 commit 9957fbc

File tree

1 file changed

+27
-4
lines changed

1 file changed

+27
-4
lines changed

examples/hello_hybrid_sparse_dense.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
# If true, use BGE-M3 model to generate dense and sparse vectors.
1515
# If false, use random numbers to compose dense and sparse vectors.
1616
use_bge_m3 = True
17+
# If both use_bge_m3 and use_reranker are true, the search result will be reranked
18+
# using BGE CrossEncoder model.
19+
use_reranker = True
1720

1821
# The overall steps are as follows:
1922
# 1. embed the text as dense and sparse vectors
@@ -113,12 +116,32 @@ def random_embedding(texts):
113116
# Currently Milvus only support 1 query in the same hybrid search request, so
114117
# we inspect res[0] directly. In future release Milvus will accept batch
115118
# hybrid search queries in the same call.
116-
for hit in res[0]:
117-
print(f'text: {hit.fields["text"]} distance {hit.distance}')
118-
119-
# If you are using BGE-M3 to generate the embedding, you should see the following:
119+
res = res[0]
120+
121+
if use_bge_m3 and use_reranker:
122+
result_texts = [hit.fields["text"] for hit in res]
123+
from pymilvus.model.reranker import BGERerankFunction
124+
bge_rf = BGERerankFunction(device='cpu')
125+
# rerank the results using BGE CrossEncoder model
126+
results = bge_rf(query, result_texts, top_k=2)
127+
for hit in results:
128+
print(f'text: {hit.text} distance {hit.score}')
129+
else:
130+
for hit in res:
131+
print(f'text: {hit.fields["text"]} distance {hit.distance}')
132+
133+
# If you used both BGE-M3 and the reranker, you should see the following:
134+
# text: Alan Turing was the first person to conduct substantial research in AI. distance 0.9306981017573297
135+
# text: Artificial intelligence was founded as an academic discipline in 1956. distance 0.03217001154515051
136+
#
137+
# If you used only BGE-M3, you should see the following:
120138
# text: Alan Turing was the first person to conduct substantial research in AI. distance 0.032786883413791656
121139
# text: Artificial intelligence was founded as an academic discipline in 1956. distance 0.016129031777381897
122140

141+
# In this simple example the reranker yields the same result as the embedding based hybrid search, but in more complex
142+
# scenarios the reranker can provide more accurate results.
143+
144+
# If you used random vectors, the result will be different each time you run the script.
145+
123146
# Drop the collection to clean up the data.
124147
utility.drop_collection(col_name)

0 commit comments

Comments
 (0)