|
14 | 14 | # If true, use BGE-M3 model to generate dense and sparse vectors.
|
15 | 15 | # If false, use random numbers to compose dense and sparse vectors.
|
16 | 16 | use_bge_m3 = True
|
| 17 | +# If both use_bge_m3 and use_reranker are true, the search result will be reranked |
| 18 | +# using BGE CrossEncoder model. |
| 19 | +use_reranker = True |
17 | 20 |
|
18 | 21 | # The overall steps are as follows:
|
19 | 22 | # 1. embed the text as dense and sparse vectors
|
@@ -113,12 +116,32 @@ def random_embedding(texts):
|
113 | 116 | # Currently Milvus only support 1 query in the same hybrid search request, so
|
114 | 117 | # we inspect res[0] directly. In future release Milvus will accept batch
|
115 | 118 | # hybrid search queries in the same call.
|
116 |
| -for hit in res[0]: |
117 |
| - print(f'text: {hit.fields["text"]} distance {hit.distance}') |
118 |
| - |
119 |
| -# If you are using BGE-M3 to generate the embedding, you should see the following: |
| 119 | +res = res[0] |
| 120 | + |
| 121 | +if use_bge_m3 and use_reranker: |
| 122 | + result_texts = [hit.fields["text"] for hit in res] |
| 123 | + from pymilvus.model.reranker import BGERerankFunction |
| 124 | + bge_rf = BGERerankFunction(device='cpu') |
| 125 | + # rerank the results using BGE CrossEncoder model |
| 126 | + results = bge_rf(query, result_texts, top_k=2) |
| 127 | + for hit in results: |
| 128 | + print(f'text: {hit.text} distance {hit.score}') |
| 129 | +else: |
| 130 | + for hit in res: |
| 131 | + print(f'text: {hit.fields["text"]} distance {hit.distance}') |
| 132 | + |
| 133 | +# If you used both BGE-M3 and the reranker, you should see the following: |
| 134 | +# text: Alan Turing was the first person to conduct substantial research in AI. distance 0.9306981017573297 |
| 135 | +# text: Artificial intelligence was founded as an academic discipline in 1956. distance 0.03217001154515051 |
| 136 | +# |
| 137 | +# If you used only BGE-M3, you should see the following: |
120 | 138 | # text: Alan Turing was the first person to conduct substantial research in AI. distance 0.032786883413791656
|
121 | 139 | # text: Artificial intelligence was founded as an academic discipline in 1956. distance 0.016129031777381897
|
122 | 140 |
|
| 141 | +# In this simple example the reranker yields the same result as the embedding based hybrid search, but in more complex |
| 142 | +# scenarios the reranker can provide more accurate results. |
| 143 | + |
| 144 | +# If you used random vectors, the result will be different each time you run the script. |
| 145 | + |
123 | 146 | # Drop the collection to clean up the data.
|
124 | 147 | utility.drop_collection(col_name)
|
0 commit comments