Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
11c211d
TextRank-fix
Gfreely Jun 27, 2025
4e3fa9b
feat(llm):TextRank fix
Gfreely Jun 30, 2025
a8313df
fix
Gfreely Jun 30, 2025
a4180ea
pylint bug fix
Gfreely Jun 30, 2025
98471a4
fix Potential issue
Gfreely Jul 10, 2025
750d338
fix default num
Gfreely Jul 10, 2025
29ddeb1
fix spilt
Gfreely Jul 10, 2025
d2e846c
fix bug
Gfreely Jul 16, 2025
9530dfb
Update keyword_extract.py
Gfreely Jul 16, 2025
f994411
support regular expression
Gfreely Jul 16, 2025
5c66bff
Update keyword_extract.py
Gfreely Jul 16, 2025
f305f6c
Update keyword_extract.py
Gfreely Jul 16, 2025
78f9356
fix language bug
Gfreely Jul 16, 2025
777589e
pylint fix
Gfreely Jul 16, 2025
2da9054
python-igraph version
Gfreely Jul 22, 2025
79383bf
Merge remote-tracking branch 'origin/main' into test
Gfreely Jul 22, 2025
9aae252
fix pyproject
Gfreely Jul 22, 2025
8b4884c
Update pyproject.toml
Gfreely Jul 22, 2025
0131563
mark todo
Gfreely Jul 22, 2025
6b6bfe5
merge main branch
Gfreely Aug 6, 2025
960481a
Update keyword_extract.py
Gfreely Aug 12, 2025
108caa5
Update textrank
Gfreely Aug 18, 2025
9790469
fix bug and gitignore
Gfreely Aug 18, 2025
5975c57
fix bug
Gfreely Aug 18, 2025
d6c54ae
fix bug
Gfreely Aug 18, 2025
02520ba
fix bug
Gfreely Aug 19, 2025
59ad7ed
update hybrid method
Gfreely Aug 21, 2025
1053060
fix bug
Gfreely Aug 21, 2025
a7c3543
fix bug
Gfreely Aug 21, 2025
236094e
Merge branch 'main' into TextRank-fix
imbajin Aug 21, 2025
ece0fc1
update new version
Gfreely Aug 29, 2025
3c3f7bb
Merge remote-tracking branch 'origin/TextRank-fix' into test
Gfreely Aug 29, 2025
b7f4136
fix bug
Gfreely Aug 29, 2025
38064c3
fix bug
Gfreely Aug 29, 2025
4379456
Update keyword_extract.py
Gfreely Sep 1, 2025
61f91de
update language
Gfreely Sep 8, 2025
27b048e
update language
Gfreely Sep 8, 2025
66c7ea8
Update graph_rag_task.py
Gfreely Sep 8, 2025
00edd28
Update word_extract.py
Gfreely Sep 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions hugegraph-llm/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ dependencies = [
"rich~=13.9.4",
"apscheduler~=3.10.4",
"litellm~=1.61.13",
"networkx~=3.4.2",
"scipy~=1.15.3",
"hugegraph-python-client",
]
[project.urls]
Expand Down
70 changes: 63 additions & 7 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@
# pylint: disable=E1101

import os
from typing import AsyncGenerator, Tuple, Literal, Optional
from typing import AsyncGenerator, Literal, Optional, Tuple

import gradio as gr
import pandas as pd
from gradio.utils import NamedString

from hugegraph_llm.config import resource_path, prompt, huge_settings, llm_settings
from hugegraph_llm.config import huge_settings, llm_settings, prompt, resource_path
from hugegraph_llm.operators.graph_rag_task import RAGPipeline
from hugegraph_llm.utils.decorators import with_task_id
from hugegraph_llm.operators.llm_op.answer_synthesize import AnswerSynthesize
from hugegraph_llm.utils.decorators import with_task_id
from hugegraph_llm.utils.log import log


def rag_answer(
text: str,
raw_answer: bool,
Expand All @@ -42,6 +42,11 @@ def rag_answer(
custom_related_information: str,
answer_prompt: str,
keywords_extract_prompt: str,
keywords_extract_method: str,
language: str,
mask_words: str,
max_keywords_num: int = 5,
window_size: int = 5,
gremlin_tmpl_num: Optional[int] = -1,
gremlin_prompt: Optional[str] = None,
max_graph_items=30,
Expand Down Expand Up @@ -75,7 +80,14 @@ def rag_answer(
if vector_search:
rag.query_vector_index()
if graph_search:
rag.extract_keywords(extract_template=keywords_extract_prompt).keywords_to_vid(
rag.extract_keywords(
extract_template=keywords_extract_prompt,
language=language,
max_keywords=max_keywords_num,
extract_method=keywords_extract_method,
window_size=window_size,
mask_words=mask_words
).keywords_to_vid(
vector_dis_threshold=vector_dis_threshold,
topk_per_keyword=topk_per_keyword,
).import_schema(huge_settings.graph_name).query_graphdb(
Expand Down Expand Up @@ -157,6 +169,11 @@ async def rag_answer_streaming(
custom_related_information: str,
answer_prompt: str,
keywords_extract_prompt: str,
keywords_extract_method: str,
language: str,
mask_words: str,
max_keywords_num: int = 5,
window_size: int = 5,
gremlin_tmpl_num: Optional[int] = -1,
gremlin_prompt: Optional[str] = None,
) -> AsyncGenerator[Tuple[str, str, str, str], None]:
Expand Down Expand Up @@ -187,7 +204,14 @@ async def rag_answer_streaming(
if vector_search:
rag.query_vector_index()
if graph_search:
rag.extract_keywords(extract_template=keywords_extract_prompt).keywords_to_vid().import_schema(
rag.extract_keywords(
extract_template=keywords_extract_prompt,
extract_method=keywords_extract_method,
language=language,
mask_words=mask_words,
max_keywords=max_keywords_num,
window_size=window_size
).keywords_to_vid().import_schema(
huge_settings.graph_name
).query_graphdb(
num_gremlin_generate_example=gremlin_tmpl_num,
Expand Down Expand Up @@ -261,7 +285,6 @@ def create_rag_block():
show_copy_button=True,
latex_delimiters=[{"left": "$", "right": "$", "display": False}],
)

answer_prompt_input = gr.Textbox(
value=prompt.answer_prompt, label="Query Prompt", show_copy_button=True, lines=7
)
Expand All @@ -271,6 +294,12 @@ def create_rag_block():
show_copy_button=True,
lines=7,
)
mask_words_input = gr.Textbox(
label="TextRank mask words",
info="""Enter any words you want to protect from being split during Chinese word segmentation(e.g., C++, website URLs). Separate each entry with a comma.""",
show_copy_button=True,
lines=7,
)

with gr.Column(scale=1):
with gr.Row():
Expand All @@ -279,6 +308,13 @@ def create_rag_block():
with gr.Row():
graph_only_radio = gr.Radio(choices=[True, False], value=True, label="Graph-only Answer")
graph_vector_radio = gr.Radio(choices=[True, False], value=False, label="Graph-Vector Answer")
with gr.Column():
with gr.Row():
extraction_method_input = gr.Radio(choices=["LLM", "TextRank"], value="TextRank", label="Keywords Extraction Method")
max_keyword_num = gr.Number(value=5, label="Max Keywords Num", precision=5)
with gr.Row():
language_input = gr.Radio(choices=["en", "zh"], value="en", label="Language")
sliding_window_size = gr.Slider(2, 10, 5, label="Sliding window size of TextRank", step=1, interactive=True)

def toggle_slider(enable):
return gr.update(interactive=enable)
Expand Down Expand Up @@ -322,6 +358,11 @@ def toggle_slider(enable):
custom_related_information,
answer_prompt_input,
keywords_extract_prompt_input,
extraction_method_input,
language_input,
mask_words_input,
max_keyword_num,
sliding_window_size,
example_num,
],
outputs=[raw_out, vector_only_out, graph_only_out, graph_vector_out],
Expand Down Expand Up @@ -387,6 +428,11 @@ def several_rag_answer(
custom_related_information_ui: str,
answer_prompt: str,
keywords_extract_prompt: str,
keywords_extraction_method: str,
language: str,
mask_words: str,
keyword_num: int,
window_size: int,
answer_max_line_count_ui: int = 1,
progress=gr.Progress(track_tqdm=True),
):
Expand All @@ -406,6 +452,11 @@ def several_rag_answer(
custom_related_information_ui,
answer_prompt,
keywords_extract_prompt,
keywords_extraction_method,
language,
mask_words,
keyword_num,
window_size,
)
df.at[index, "Basic LLM Answer"] = basic_llm_answer
df.at[index, "Vector-only Answer"] = vector_only_answer
Expand Down Expand Up @@ -439,6 +490,11 @@ def several_rag_answer(
custom_related_information,
answer_prompt_input,
keywords_extract_prompt_input,
extraction_method_input,
language_input,
mask_words_input,
max_keyword_num,
sliding_window_size,
answer_max_line_count,
],
outputs=[qa_dataframe, gr.File(label="Download Answered File", min_width=40)],
Expand Down
16 changes: 12 additions & 4 deletions hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
# under the License.


from typing import Dict, Any, Optional, List, Literal
from typing import Any, Dict, List, Literal, Optional

from hugegraph_llm.config import huge_settings, prompt
from hugegraph_llm.models.embeddings.base import BaseEmbedding
from hugegraph_llm.models.embeddings.init_embedding import Embeddings
from hugegraph_llm.models.llms.base import BaseLLM
Expand All @@ -31,8 +32,7 @@
from hugegraph_llm.operators.index_op.vector_index_query import VectorIndexQuery
from hugegraph_llm.operators.llm_op.answer_synthesize import AnswerSynthesize
from hugegraph_llm.operators.llm_op.keyword_extract import KeywordExtract
from hugegraph_llm.utils.decorators import log_time, log_operator_time, record_rpm
from hugegraph_llm.config import prompt, huge_settings
from hugegraph_llm.utils.decorators import log_operator_time, log_time, record_rpm


class RAGPipeline:
Expand Down Expand Up @@ -69,7 +69,10 @@ def extract_keywords(
self,
text: Optional[str] = None,
max_keywords: int = 5,
language: str = "english",
language: str = "en",
extract_method: str = "TextRank",
window_size: int = 5,
mask_words: str = "",
extract_template: Optional[str] = None,
):
"""
Expand All @@ -79,6 +82,9 @@ def extract_keywords(
:param max_keywords: Maximum number of keywords to extract.
:param language: Language of the text.
:param extract_template: Template for keyword extraction.
:param extract_method: Method for Keyword extraction
:param window_size: Sliding window size of TextRank.
:param mask_words: Filter words for TextRank in Chinese word segmentation
:return: Self-instance for chaining.
"""
self._operators.append(
Expand All @@ -87,6 +93,8 @@ def extract_keywords(
max_keywords=max_keywords,
language=language,
extract_template=extract_template,
extract_method=extract_method,
textrank_kwargs={"keyword_num": max_keywords, "window_size": window_size, "mask_words": mask_words},
)
)
return self
Expand Down
Loading
Loading