Skip to content

Commit 1a1f58e

Browse files
authored
Update settings.yaml
1 parent db34802 commit 1a1f58e

File tree

1 file changed

+135
-68
lines changed

1 file changed

+135
-68
lines changed

ragtest/settings.yaml

Lines changed: 135 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,145 @@
1-
async_mode: threaded
2-
cache:
3-
base_dir: cache
4-
type: file
5-
chunks:
6-
group_by_columns:
7-
- id
8-
overlap: 64
9-
size: 512
10-
claim_extraction:
11-
description: Any claims or facts that could be relevant to information discovery.
12-
max_gleanings: 0
13-
prompt: prompts/claim_extraction.txt
14-
cluster_graph:
15-
max_cluster_size: 10
16-
community_report:
17-
max_input_length: 8000
18-
max_length: 2000
19-
prompt: prompts/community_report.txt
20-
embed_graph:
21-
enabled: false
1+
2+
encoding_model: cl100k_base
3+
skip_workflows: []
4+
llm:
5+
api_key: ${GRAPHRAG_API_KEY}
6+
type: openai_chat # or azure_openai_chat
7+
model: mistral-nemo:12b-instruct-2407-fp16
8+
model_supports_json: true # recommended if this is available for your model.
9+
max_tokens: 8192
10+
# request_timeout: 180.0
11+
api_base: http://localhost:11434/v1
12+
# api_version: 2024-02-15-preview
13+
# organization: <organization_id>
14+
# deployment_name: <azure_model_deployment_name>
15+
# tokens_per_minute: 150_000 # set a leaky bucket throttle
16+
# requests_per_minute: 10_000 # set a leaky bucket throttle
17+
max_retries: 3
18+
# max_retry_wait: 10.0
19+
# sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
20+
concurrent_requests: 25 # the number of parallel inflight requests that may be made
21+
22+
parallelization:
23+
stagger: 0.3
24+
num_threads: 50 # the number of threads to use for parallel processing
25+
26+
async_mode: threaded # or asyncio
27+
2228
embeddings:
23-
async_mode: threaded
29+
## parallelization: override the global parallelization settings for embeddings
30+
async_mode: threaded # or asyncio
2431
llm:
25-
api_base: http://localhost:11434/api
2632
api_key: ${GRAPHRAG_API_KEY}
27-
concurrent_requests: 25
33+
type: openai_embedding # or azure_openai_embedding
2834
model: nomic-embed-text:latest
29-
model_supports_json: true
30-
provider: openai_embedding
31-
type: openai_embedding
32-
encoding_model: cl100k_base
33-
entity_extraction:
34-
entity_types:
35-
- organization
36-
- person
37-
- geo
38-
- event
39-
max_gleanings: 0
40-
prompt: prompts/entity_extraction.txt
41-
global_search:
42-
concurrency: 32
35+
api_base: http://localhost:11434/api
36+
# api_version: 2024-02-15-preview
37+
# organization: <organization_id>
38+
# deployment_name: <azure_model_deployment_name>
39+
# tokens_per_minute: 150_000 # set a leaky bucket throttle
40+
# requests_per_minute: 10_000 # set a leaky bucket throttle
41+
max_retries: 3
42+
# max_retry_wait: 10.0
43+
# sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
44+
concurrent_requests: 25 # the number of parallel inflight requests that may be made
45+
#batch_size: 1 # the number of documents to send in a single request
46+
#batch_max_tokens: 4000 # the maximum number of tokens to send in a single request
47+
# target: required # or optional
48+
49+
50+
51+
chunks:
52+
size: 512
53+
overlap: 64
54+
group_by_columns: [id] # by default, we don't allow chunks to cross documents
55+
4356
input:
44-
base_dir: input
57+
type: file # or blob
58+
file_type: text # or csv
59+
base_dir: "input"
4560
file_encoding: utf-8
46-
file_pattern: .*\.txt$
47-
file_type: text
48-
type: file
49-
llm:
50-
api_base: http://localhost:11434/v1
51-
api_key: ${GRAPHRAG_API_KEY}
52-
concurrent_requests: 25
53-
max_tokens: 1024
54-
model: qwen2:7b
55-
model_supports_json: true
56-
provider: openai_chat
57-
temperature: 0.5
58-
type: openai
59-
local_search: null
60-
parallelization:
61-
num_threads: 50
62-
stagger: 0.3
63-
reporting:
64-
base_dir: output/${timestamp}/reports
65-
type: file
66-
skip_workflows: []
67-
snapshots:
68-
graphml: true
69-
raw_entities: true
70-
top_level_nodes: true
61+
file_pattern: ".*\\.txt$"
62+
63+
cache:
64+
type: file # or blob
65+
base_dir: "cache"
66+
# connection_string: <azure_blob_storage_connection_string>
67+
# container_name: <azure_blob_storage_container_name>
68+
7169
storage:
72-
base_dir: output/${timestamp}/artifacts
73-
type: file
70+
type: file # or blob
71+
base_dir: "output/${timestamp}/artifacts"
72+
# connection_string: <azure_blob_storage_connection_string>
73+
# container_name: <azure_blob_storage_container_name>
74+
75+
reporting:
76+
type: file # or console, blob
77+
base_dir: "output/${timestamp}/reports"
78+
# connection_string: <azure_blob_storage_connection_string>
79+
# container_name: <azure_blob_storage_container_name>
80+
81+
entity_extraction:
82+
## llm: override the global llm settings for this task
83+
## parallelization: override the global parallelization settings for this task
84+
## async_mode: override the global async_mode settings for this task
85+
prompt: "prompts/entity_extraction.txt"
86+
entity_types: [organization,person,geo,event]
87+
max_gleanings: 0
88+
7489
summarize_descriptions:
90+
## llm: override the global llm settings for this task
91+
## parallelization: override the global parallelization settings for this task
92+
## async_mode: override the global async_mode settings for this task
93+
prompt: "prompts/summarize_descriptions.txt"
7594
max_length: 500
76-
prompt: prompts/summarize_descriptions.txt
95+
96+
claim_extraction:
97+
## llm: override the global llm settings for this task
98+
## parallelization: override the global parallelization settings for this task
99+
## async_mode: override the global async_mode settings for this task
100+
# enabled: true
101+
prompt: "prompts/claim_extraction.txt"
102+
description: "Any claims or facts that could be relevant to information discovery."
103+
max_gleanings: 0
104+
105+
community_reports:
106+
## llm: override the global llm settings for this task
107+
## parallelization: override the global parallelization settings for this task
108+
## async_mode: override the global async_mode settings for this task
109+
prompt: "prompts/community_report.txt"
110+
max_length: 2000
111+
max_input_length: 4000
112+
113+
cluster_graph:
114+
max_cluster_size: 10
115+
116+
embed_graph:
117+
enabled: false # if true, will generate node2vec embeddings for nodes
118+
# num_walks: 10
119+
# walk_length: 40
120+
# window_size: 2
121+
# iterations: 3
122+
# random_seed: 597832
123+
77124
umap:
78-
enabled: false
125+
enabled: false # if true, will generate UMAP embeddings for nodes
126+
127+
snapshots:
128+
graphml: false
129+
raw_entities: false
130+
top_level_nodes: false
131+
132+
local_search:
133+
# text_unit_prop: 0.5
134+
# community_prop: 0.1
135+
# conversation_history_max_turns: 5
136+
# top_k_mapped_entities: 10
137+
# top_k_relationships: 10
138+
# max_tokens: 12000
139+
140+
global_search:
141+
# max_tokens: 12000
142+
# data_max_tokens: 12000
143+
# map_max_tokens: 1000
144+
# reduce_max_tokens: 2000
145+
# concurrency: 32

0 commit comments

Comments
 (0)