Skip to content

Commit 595abb7

Browse files
authored
Merge pull request #15 from severian42/dev
Enhance indexing and querying functionalities
2 parents df06e5a + 42f5cec commit 595abb7

File tree

98 files changed

+261
-186
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+261
-186
lines changed

.DS_Store

0 Bytes
Binary file not shown.

app.py

Lines changed: 99 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,14 @@
2020
from ollama import chat
2121
import pyarrow.parquet as pq
2222
import pandas as pd
23+
import sys
2324

25+
# Add the project root to the Python path
26+
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
27+
sys.path.insert(0, project_root)
28+
29+
import gradio as gr
30+
from graphrag.query import cli
2431

2532
# Set up logging
2633
log_queue = queue.Queue()
@@ -80,14 +87,8 @@ def create_setting_component(key, value):
8087
outputs=[status]
8188
)
8289

83-
def run_command(command):
84-
try:
85-
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
86-
return result.stdout
87-
except subprocess.CalledProcessError as e:
88-
return f"Error: {e.stderr}"
89-
90-
def index_graph(root_dir, progress=gr.Progress()):
90+
def index_graph(progress=gr.Progress()):
91+
root_dir = "./ragtest"
9192
command = f"python -m graphrag.index --root {root_dir}"
9293
logging.info(f"Running indexing command: {command}")
9394

@@ -129,10 +130,28 @@ def run_command_with_output():
129130
logging.info("Indexing completed")
130131
return "\n".join(full_output), update_logs()
131132

132-
def run_query(root_dir, method, query, history):
133-
command = f"python -m graphrag.query --root {root_dir} --method {method} \"{query}\""
134-
result = run_command(command)
135-
return result
133+
def run_query(root_dir, method, query, history, model, temperature, max_tokens):
134+
system_message = f"You are a helpful assistant performing a {method} search on the knowledge graph. Provide a concise and relevant answer based on the query."
135+
messages = [{"role": "system", "content": system_message}]
136+
for item in history:
137+
if isinstance(item, tuple) and len(item) == 2:
138+
human, ai = item
139+
messages.append({"role": "user", "content": human})
140+
messages.append({"role": "assistant", "content": ai})
141+
messages.append({"role": "user", "content": query})
142+
143+
try:
144+
response = chat(
145+
model=model,
146+
messages=messages,
147+
options={
148+
"temperature": temperature,
149+
"num_predict": max_tokens
150+
}
151+
)
152+
return response['message']['content']
153+
except Exception as e:
154+
return f"Error: {str(e)}"
136155

137156
def upload_file(file):
138157
if file is not None:
@@ -230,17 +249,28 @@ def manage_data():
230249
"input_files": input_files
231250
}
232251

252+
233253
def find_latest_graph_file(root_dir):
234254
pattern = os.path.join(root_dir, "output", "*", "artifacts", "*.graphml")
235255
graph_files = glob.glob(pattern)
256+
if not graph_files:
257+
# If no files found, try excluding .DS_Store
258+
output_dir = os.path.join(root_dir, "output")
259+
run_dirs = [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d)) and d != ".DS_Store"]
260+
if run_dirs:
261+
latest_run = max(run_dirs)
262+
pattern = os.path.join(root_dir, "output", latest_run, "artifacts", "*.graphml")
263+
graph_files = glob.glob(pattern)
264+
236265
if not graph_files:
237266
return None
238267

239268
# Sort files by modification time, most recent first
240269
latest_file = max(graph_files, key=os.path.getmtime)
241270
return latest_file
242271

243-
def update_visualization(root_dir, folder_name, file_name):
272+
def update_visualization(folder_name, file_name):
273+
root_dir = "./ragtest"
244274
if not folder_name or not file_name:
245275
return None, "Please select a folder and a GraphML file."
246276
file_name = file_name.split("] ")[1] if "]" in file_name else file_name # Remove file type prefix
@@ -345,9 +375,13 @@ def update_logs():
345375

346376
def chat_with_llm(message, history, system_message, temperature, max_tokens, model):
347377
messages = [{"role": "system", "content": system_message}]
348-
for human, ai in history:
349-
messages.append({"role": "user", "content": human})
350-
messages.append({"role": "assistant", "content": ai})
378+
for item in history:
379+
if isinstance(item, tuple) and len(item) == 2:
380+
human, ai = item
381+
messages.append({"role": "user", "content": human})
382+
messages.append({"role": "assistant", "content": ai})
383+
elif isinstance(item, str):
384+
messages.append({"role": "user", "content": item})
351385
messages.append({"role": "user", "content": message})
352386

353387
try:
@@ -363,16 +397,17 @@ def chat_with_llm(message, history, system_message, temperature, max_tokens, mod
363397
except Exception as e:
364398
return f"Error: {str(e)}"
365399

366-
def send_message(root_dir, query_type, query, history, system_message, temperature, max_tokens, model):
367-
if query_type == "global":
368-
result = run_query(root_dir, "global", query, history)
369-
history.append((query, result))
370-
elif query_type == "local":
371-
result = run_query(root_dir, "local", query, history)
372-
history.append((query, result))
373-
else: # Direct chat
374-
result = chat_with_llm(query, history, system_message, temperature, max_tokens, model)
400+
def send_message(query_type, query, history, system_message, temperature, max_tokens, model):
401+
root_dir = "./ragtest"
402+
try:
403+
if query_type in ["global", "local"]:
404+
result = run_query(root_dir, query_type, query, history, model, temperature, max_tokens)
405+
else: # Direct chat
406+
result = chat_with_llm(query, history, system_message, temperature, max_tokens, model)
375407
history.append((query, result))
408+
except Exception as e:
409+
error_message = f"An error occurred: {str(e)}"
410+
history.append((query, error_message))
376411
return history, gr.update(value=""), update_logs()
377412

378413
def fetch_ollama_models():
@@ -659,16 +694,19 @@ def update_file_content(file_path):
659694
return f"Error reading file: {str(e)}"
660695

661696
def update_output_folder_list():
662-
folders = list_output_folders(root_dir.value)
697+
root_dir = "./ragtest"
698+
folders = list_output_folders(root_dir)
663699
return gr.update(choices=folders, value=folders[0] if folders else None)
664700

665-
def update_folder_content_list(root_dir, folder_name):
701+
def update_folder_content_list(folder_name):
702+
root_dir = "./ragtest"
666703
if not folder_name:
667704
return gr.update(choices=[])
668705
contents = list_folder_contents(os.path.join(root_dir, "output", folder_name))
669706
return gr.update(choices=contents)
670707

671-
def handle_content_selection(root_dir, folder_name, selected_item):
708+
def handle_content_selection(folder_name, selected_item):
709+
root_dir = "./ragtest"
672710
if isinstance(selected_item, list) and selected_item:
673711
selected_item = selected_item[0] # Take the first item if it's a list
674712

@@ -687,7 +725,8 @@ def handle_content_selection(root_dir, folder_name, selected_item):
687725
else:
688726
return gr.update(), "", ""
689727

690-
def initialize_selected_folder(root_dir, folder_name):
728+
def initialize_selected_folder(folder_name):
729+
root_dir = "./ragtest"
691730
if not folder_name:
692731
return "Please select a folder first.", gr.update(choices=[])
693732
folder_path = os.path.join(root_dir, "output", folder_name, "artifacts")
@@ -740,11 +779,10 @@ def list_folder_contents(folder_path):
740779
operation_status = gr.Textbox(label="Operation Status", visible=False)
741780

742781

743-
with gr.Accordion("Indexing", open=True):
744-
root_dir = gr.Textbox(label="Root Directory", value=os.path.abspath("./ragtest"))
745-
index_btn = gr.Button("Run Indexing", variant="primary")
746-
index_output = gr.Textbox(label="Indexing Output", lines=10, visible=True)
747-
index_progress = gr.Textbox(label="Indexing Progress", visible=True)
782+
with gr.Accordion("Indexing", open=False):
783+
index_btn = gr.Button("Run Indexing", variant="primary")
784+
index_output = gr.Textbox(label="Indexing Output", lines=10, visible=True)
785+
index_progress = gr.Textbox(label="Indexing Progress", visible=True)
748786

749787
with gr.TabItem("Indexing Outputs"):
750788
output_folder_list = gr.Dropdown(label="Select Output Folder", choices=[], interactive=True)
@@ -805,38 +843,53 @@ def list_folder_contents(folder_path):
805843
save_btn.click(fn=save_file_content, inputs=[file_list, file_content], outputs=[operation_status, log_output])
806844
index_btn.click(
807845
fn=index_graph,
808-
inputs=[root_dir],
809846
outputs=[index_output, log_output],
810847
show_progress=True
811848
)
812849
refresh_folder_btn.click(fn=update_output_folder_list, outputs=[output_folder_list]).then(
813850
fn=update_logs,
814851
outputs=[log_output]
815852
)
816-
output_folder_list.change(fn=update_folder_content_list, inputs=[root_dir, output_folder_list], outputs=[folder_content_list]).then(
853+
output_folder_list.change(
854+
fn=update_folder_content_list,
855+
inputs=[output_folder_list],
856+
outputs=[folder_content_list]
857+
).then(
817858
fn=update_logs,
818859
outputs=[log_output]
819860
)
820-
folder_content_list.change(fn=handle_content_selection, inputs=[root_dir, output_folder_list, folder_content_list], outputs=[folder_content_list, file_info, output_content]).then(
861+
folder_content_list.change(
862+
fn=handle_content_selection,
863+
inputs=[output_folder_list, folder_content_list],
864+
outputs=[folder_content_list, file_info, output_content]
865+
).then(
821866
fn=update_logs,
822867
outputs=[log_output]
823868
)
824-
initialize_folder_btn.click(fn=initialize_selected_folder, inputs=[root_dir, output_folder_list], outputs=[initialization_status, folder_content_list]).then(
869+
initialize_folder_btn.click(
870+
fn=initialize_selected_folder,
871+
inputs=[output_folder_list],
872+
outputs=[initialization_status, folder_content_list]
873+
).then(
825874
fn=update_logs,
826875
outputs=[log_output]
827876
)
828-
vis_btn.click(fn=update_visualization, inputs=[root_dir, output_folder_list, folder_content_list], outputs=[vis_output, vis_status]).then(
877+
vis_btn.click(
878+
fn=update_visualization,
879+
inputs=[output_folder_list, folder_content_list],
880+
outputs=[vis_output, vis_status]
881+
).then(
829882
fn=update_logs,
830883
outputs=[log_output]
831884
)
832885
query_btn.click(
833886
fn=send_message,
834-
inputs=[root_dir, query_type, query_input, chatbot, system_message, temperature, max_tokens, model],
887+
inputs=[query_type, query_input, chatbot, system_message, temperature, max_tokens, model],
835888
outputs=[chatbot, query_input, log_output]
836889
)
837890
query_input.submit(
838891
fn=send_message,
839-
inputs=[root_dir, query_type, query_input, chatbot, system_message, temperature, max_tokens, model],
892+
inputs=[query_type, query_input, chatbot, system_message, temperature, max_tokens, model],
840893
outputs=[chatbot, query_input, log_output]
841894
)
842895
refresh_models_btn.click(
@@ -866,6 +919,9 @@ def list_folder_contents(folder_path):
866919
document.addEventListener('DOMContentLoaded', addShiftEnterListener);
867920
""")
868921

922+
923+
demo = demo.queue()
924+
925+
869926
if __name__ == "__main__":
870-
demo.queue()
871-
demo.launch()
927+
demo.launch(share=True, reload=False)

graphrag/.DS_Store

0 Bytes
Binary file not shown.

graphrag/index/.DS_Store

0 Bytes
Binary file not shown.

graphrag/index/graph/.DS_Store

6 KB
Binary file not shown.
-3 Bytes
Binary file not shown.
Binary file not shown.
-3 Bytes
Binary file not shown.
-3 Bytes
Binary file not shown.

graphrag/index/verbs/.DS_Store

6 KB
Binary file not shown.

0 commit comments

Comments
 (0)