Merge pull request #513 from funstory-ai/dev

awwaawwa · web-flow · commit 0da115c749c2 · 2025-10-15T21:02:13.000+08:00
prompt: Optimize prompt structure to improve prefix cache hit rate
diff --git a/babeldoc/__init__.py b/babeldoc/__init__.py
@@ -1 +1 @@
-__version__ = "0.5.14"
+__version__ = "0.5.15"
diff --git a/babeldoc/const.py b/babeldoc/const.py
@@ -6,7 +6,7 @@
 import threading
 from pathlib import Path
 
-__version__ = "0.5.14"
+__version__ = "0.5.15"
 
 CACHE_FOLDER = Path.home() / ".cache" / "babeldoc"
 
diff --git a/babeldoc/format/pdf/document_il/midend/automatic_term_extractor.py b/babeldoc/format/pdf/document_il/midend/automatic_term_extractor.py
@@ -138,18 +138,24 @@ def calc_token_count(self, text: str) -> int:
         except Exception:
             return 0
 
-    def _snapshot_token_usage(self) -> tuple[int, int, int]:
+    def _snapshot_token_usage(self) -> tuple[int, int, int, int]:
         if not self.translate_engine:
-            return 0, 0, 0
+            return 0, 0, 0, 0
         token_counter = getattr(self.translate_engine, "token_count", None)
         prompt_counter = getattr(self.translate_engine, "prompt_token_count", None)
         completion_counter = getattr(
             self.translate_engine, "completion_token_count", None
         )
+        cache_hit_prompt_counter = getattr(
+            self.translate_engine, "cache_hit_prompt_token_count", None
+        )
         total_tokens = token_counter.value if token_counter else 0
         prompt_tokens = prompt_counter.value if prompt_counter else 0
         completion_tokens = completion_counter.value if completion_counter else 0
-        return total_tokens, prompt_tokens, completion_tokens
+        cache_hit_prompt_tokens = (
+            cache_hit_prompt_counter.value if cache_hit_prompt_counter else 0
+        )
+        return total_tokens, prompt_tokens, completion_tokens, cache_hit_prompt_tokens
 
     def _clean_json_output(self, llm_output: str) -> str:
         llm_output = llm_output.strip()
@@ -327,7 +333,9 @@ def extract_terms_from_paragraphs(
 
     def procress(self, doc_il: ILDocument):
         logger.info(f"{self.stage_name}: Starting term extraction for document.")
-        start_total, start_prompt, start_completion = self._snapshot_token_usage()
+        start_total, start_prompt, start_completion, start_cache_hit_prompt = (
+            self._snapshot_token_usage()
+        )
         tracker = DocumentTermExtractTracker()
         total = sum(len(page.pdf_paragraph) for page in doc_il.page)
         with self.translation_config.progress_monitor.stage_start(
@@ -341,11 +349,14 @@ def procress(self, doc_il: ILDocument):
                     self.process_page(page, executor, pbar, tracker.new_page())
 
         self.shared_context.finalize_auto_extracted_glossary()
-        end_total, end_prompt, end_completion = self._snapshot_token_usage()
+        end_total, end_prompt, end_completion, end_cache_hit_prompt = (
+            self._snapshot_token_usage()
+        )
         self.translation_config.record_term_extraction_usage(
             end_total - start_total,
             end_prompt - start_prompt,
             end_completion - start_completion,
+            end_cache_hit_prompt - start_cache_hit_prompt,
         )
 
         if self.translation_config.debug:
diff --git a/babeldoc/format/pdf/document_il/midend/il_translator.py b/babeldoc/format/pdf/document_il/midend/il_translator.py
@@ -890,6 +890,38 @@ def generate_prompt_for_llm(
                 f"You are a professional and reliable machine translation engine responsible for translating the input text into {self.translation_config.lang_out}."
             ]
 
+        llm_input.append("When translating, please follow the following rules:")
+
+        rich_text_left_placeholder = (
+            self.translate_engine.get_rich_text_left_placeholder(1)
+        )
+        if isinstance(rich_text_left_placeholder, tuple):
+            rich_text_left_placeholder = rich_text_left_placeholder[0]
+        rich_text_right_placeholder = (
+            self.translate_engine.get_rich_text_right_placeholder(2)
+        )
+        if isinstance(rich_text_right_placeholder, tuple):
+            rich_text_right_placeholder = rich_text_right_placeholder[0]
+
+        # Create a structured prompt template for LLM translation
+        llm_input.append(
+            f'1. Do not translate style tags, such as "{rich_text_left_placeholder}xxx{rich_text_right_placeholder}"!'
+        )
+
+        formula_placeholder = self.translate_engine.get_formular_placeholder(3)
+        if isinstance(formula_placeholder, tuple):
+            formula_placeholder = formula_placeholder[0]
+
+        llm_input.append(
+            f'2. Do not translate formula placeholders, such as "{formula_placeholder}". The system will automatically replace the placeholders with the corresponding formulas.'
+        )
+        llm_input.append(
+            "3. If there is no need to translate (such as proper nouns, codes, etc.), then return the original text."
+        )
+        llm_input.append(
+            f"4. Only output the translation result in {self.translation_config.lang_out} without explanations and annotations."
+        )
+
         llm_context_hints = []
 
         if title_paragraph:
@@ -952,41 +984,8 @@ def generate_prompt_for_llm(
                 for md_block in active_glossary_markdown_blocks:
                     llm_input.append(f"\n{md_block}\n")
 
-        llm_input.append("When translating, please follow the following rules:")
-
-        rich_text_left_placeholder = (
-            self.translate_engine.get_rich_text_left_placeholder(1)
-        )
-        if isinstance(rich_text_left_placeholder, tuple):
-            rich_text_left_placeholder = rich_text_left_placeholder[0]
-        rich_text_right_placeholder = (
-            self.translate_engine.get_rich_text_right_placeholder(2)
-        )
-        if isinstance(rich_text_right_placeholder, tuple):
-            rich_text_right_placeholder = rich_text_right_placeholder[0]
-
-        # Create a structured prompt template for LLM translation
-        llm_input.append(
-            f'1. Do not translate style tags, such as "{rich_text_left_placeholder}xxx{rich_text_right_placeholder}"!'
-        )
-
-        formula_placeholder = self.translate_engine.get_formular_placeholder(3)
-        if isinstance(formula_placeholder, tuple):
-            formula_placeholder = formula_placeholder[0]
-
-        llm_input.append(
-            f'2. Do not translate formula placeholders, such as "{formula_placeholder}". The system will automatically replace the placeholders with the corresponding formulas.'
-        )
-        llm_input.append(
-            "3. If there is no need to translate (such as proper nouns, codes, etc.), then return the original text."
-        )
-        llm_input.append(
-            f"4. Only output the translation result in {self.translation_config.lang_out} without explanations and annotations."
-        )
-        llm_input.append(f"5. Translate text into {self.translation_config.lang_out}.")
         prompt_template = f"""
 Now, please carefully read the following text to be translated and directly output your translation.\n\n{text}
-
 """
         llm_input.append(prompt_template)
 
diff --git a/babeldoc/format/pdf/document_il/midend/il_translator_llm_only.py b/babeldoc/format/pdf/document_il/midend/il_translator_llm_only.py
@@ -643,69 +643,6 @@ def translate_paragraph(
                     "When translating, strictly follow the instructions below to ensure translation quality and preserve all formatting, tags, and placeholders:\n"
                 )
 
-            # 2. ##Contextual Hints for Better Translation
-            contextual_hints_section: list[str] = []
-            hint_idx = 1
-            if title_paragraph:
-                contextual_hints_section.append(
-                    f"{hint_idx}. First title in full text: {title_paragraph.unicode}"
-                )
-                hint_idx += 1
-
-            if local_title_paragraph:
-                is_different_from_global = True
-                if title_paragraph:
-                    if local_title_paragraph.debug_id == title_paragraph.debug_id:
-                        is_different_from_global = False
-
-                if is_different_from_global:
-                    contextual_hints_section.append(
-                        f"{hint_idx}. Most similar section title: {local_title_paragraph.unicode}"
-                    )
-                    hint_idx += 1
-
-            # --- ADD GLOSSARY HINTS ---
-            batch_text_for_glossary_matching = "\n".join(
-                item.get("input", "") for item in json_format_input
-            )
-
-            active_glossary_markdown_blocks: list[str] = []
-            # Use cached glossaries
-            if self._cached_glossaries:
-                for glossary in self._cached_glossaries:
-                    # Get active entries for the current batch_text_for_glossary_matching
-                    active_entries = glossary.get_active_entries_for_text(
-                        batch_text_for_glossary_matching
-                    )
-
-                    if active_entries:
-                        current_glossary_md_entries: list[str] = []
-                        for original_source, target_text in sorted(active_entries):
-                            current_glossary_md_entries.append(
-                                f"| {original_source} | {target_text} |"
-                            )
-
-                        if current_glossary_md_entries:
-                            glossary_table_md = (
-                                f"### Glossary: {glossary.name}\n\n"
-                                "| Source Term | Target Term |\n"
-                                "|-------------|-------------|\n"
-                                + "\n".join(current_glossary_md_entries)
-                            )
-                            active_glossary_markdown_blocks.append(glossary_table_md)
-
-            if contextual_hints_section or active_glossary_markdown_blocks:
-                llm_prompt_parts.append("\n## Contextual Hints for Better Translation")
-                llm_prompt_parts.extend(contextual_hints_section)
-
-                if active_glossary_markdown_blocks:
-                    llm_prompt_parts.append(
-                        f"{hint_idx}. You MUST strictly adhere to the following glossaries. auto_extracted_glossary has a lower priority; please give preference to other glossaries. If a source term from a table appears in the text, use the corresponding target term in your translation:"
-                    )
-                    # hint_idx += 1 # No need to increment if tables are part of this point
-                    for md_block in active_glossary_markdown_blocks:
-                        llm_prompt_parts.append(f"\n{md_block}\n")
-
             # 3. ## Strict Rules:
             llm_prompt_parts.append("\n## Strict Rules:")
             llm_prompt_parts.append(
@@ -771,6 +708,69 @@ def translate_paragraph(
             llm_prompt_parts.append("```")
             llm_prompt_parts.append("</example>")
 
+            # 2. ##Contextual Hints for Better Translation
+            contextual_hints_section: list[str] = []
+            hint_idx = 1
+            if title_paragraph:
+                contextual_hints_section.append(
+                    f"{hint_idx}. First title in full text: {title_paragraph.unicode}"
+                )
+                hint_idx += 1
+
+            if local_title_paragraph:
+                is_different_from_global = True
+                if title_paragraph:
+                    if local_title_paragraph.debug_id == title_paragraph.debug_id:
+                        is_different_from_global = False
+
+                if is_different_from_global:
+                    contextual_hints_section.append(
+                        f"{hint_idx}. The most recent title is: {local_title_paragraph.unicode}"
+                    )
+                    hint_idx += 1
+
+            # --- ADD GLOSSARY HINTS ---
+            batch_text_for_glossary_matching = "\n".join(
+                item.get("input", "") for item in json_format_input
+            )
+
+            active_glossary_markdown_blocks: list[str] = []
+            # Use cached glossaries
+            if self._cached_glossaries:
+                for glossary in self._cached_glossaries:
+                    # Get active entries for the current batch_text_for_glossary_matching
+                    active_entries = glossary.get_active_entries_for_text(
+                        batch_text_for_glossary_matching
+                    )
+
+                    if active_entries:
+                        current_glossary_md_entries: list[str] = []
+                        for original_source, target_text in sorted(active_entries):
+                            current_glossary_md_entries.append(
+                                f"| {original_source} | {target_text} |"
+                            )
+
+                        if current_glossary_md_entries:
+                            glossary_table_md = (
+                                f"### Glossary: {glossary.name}\n\n"
+                                "| Source Term | Target Term |\n"
+                                "|-------------|-------------|\n"
+                                + "\n".join(current_glossary_md_entries)
+                            )
+                            active_glossary_markdown_blocks.append(glossary_table_md)
+
+            if contextual_hints_section or active_glossary_markdown_blocks:
+                llm_prompt_parts.append("\n## Contextual Hints for Better Translation")
+                llm_prompt_parts.extend(contextual_hints_section)
+
+                if active_glossary_markdown_blocks:
+                    llm_prompt_parts.append(
+                        f"{hint_idx}. You MUST strictly adhere to the following glossaries. please give preference to other glossaries. If a source term from a table appears in the text, use the corresponding target term in your translation:"
+                    )
+                    # hint_idx += 1 # No need to increment if tables are part of this point
+                    for md_block in active_glossary_markdown_blocks:
+                        llm_prompt_parts.append(f"\n{md_block}\n")
+
             # 6. ## Here is the input:
             llm_prompt_parts.append("\n## Here is the input:")
 
diff --git a/babeldoc/format/pdf/translation_config.py b/babeldoc/format/pdf/translation_config.py
@@ -321,6 +321,7 @@ def __init__(
             "total_tokens": 0,
             "prompt_tokens": 0,
             "completion_tokens": 0,
+            "cache_hit_prompt_tokens": 0,
         }
 
         if self.ocr_workaround:
@@ -437,6 +438,7 @@ def record_term_extraction_usage(
         total_tokens: int,
         prompt_tokens: int,
         completion_tokens: int,
+        cache_hit_prompt_tokens: int,
     ) -> None:
         """Accumulate token usage for automatic term extraction."""
         if total_tokens > 0:
@@ -445,6 +447,10 @@ def record_term_extraction_usage(
             self.term_extraction_token_usage["prompt_tokens"] += prompt_tokens
         if completion_tokens > 0:
             self.term_extraction_token_usage["completion_tokens"] += completion_tokens
+        if cache_hit_prompt_tokens > 0:
+            self.term_extraction_token_usage["cache_hit_prompt_tokens"] += (
+                cache_hit_prompt_tokens
+            )
 
 
 class TranslateResult:
diff --git a/babeldoc/main.py b/babeldoc/main.py
@@ -26,7 +26,7 @@
 from babeldoc.translator.translator import set_translate_rate_limiter
 
 logger = logging.getLogger(__name__)
-__version__ = "0.5.14"
+__version__ = "0.5.15"
 
 
 def create_parser():
@@ -636,6 +636,7 @@ async def main():
     total_term_extraction_total_tokens = 0
     total_term_extraction_prompt_tokens = 0
     total_term_extraction_completion_tokens = 0
+    total_term_extraction_cache_hit_prompt_tokens = 0
 
     for file in pending_files:
         # 清理文件路径，去除两端的引号
@@ -721,21 +722,29 @@ def nop(_x):
         total_term_extraction_total_tokens += usage["total_tokens"]
         total_term_extraction_prompt_tokens += usage["prompt_tokens"]
         total_term_extraction_completion_tokens += usage["completion_tokens"]
+        total_term_extraction_cache_hit_prompt_tokens += usage[
+            "cache_hit_prompt_tokens"
+        ]
     logger.info(f"Total tokens: {translator.token_count.value}")
     logger.info(f"Prompt tokens: {translator.prompt_token_count.value}")
     logger.info(f"Completion tokens: {translator.completion_token_count.value}")
     logger.info(
-        "Term extraction tokens: total=%s prompt=%s completion=%s",
+        f"Cache hit prompt tokens: {translator.cache_hit_prompt_token_count.value}"
+    )
+    logger.info(
+        "Term extraction tokens: total=%s prompt=%s completion=%s cache_hit_prompt=%s",
         total_term_extraction_total_tokens,
         total_term_extraction_prompt_tokens,
         total_term_extraction_completion_tokens,
+        total_term_extraction_cache_hit_prompt_tokens,
     )
     if term_extraction_translator is not translator:
         logger.info(
-            "Term extraction translator raw tokens: total=%s prompt=%s completion=%s",
+            "Term extraction translator raw tokens: total=%s prompt=%s completion=%s cache_hit_prompt=%s",
             term_extraction_translator.token_count.value,
             term_extraction_translator.prompt_token_count.value,
             term_extraction_translator.completion_token_count.value,
+            term_extraction_translator.cache_hit_prompt_token_count.value,
         )
 
 
diff --git a/babeldoc/translator/translator.py b/babeldoc/translator/translator.py
@@ -249,6 +249,7 @@ def __init__(
         self.token_count = AtomicInteger()
         self.prompt_token_count = AtomicInteger()
         self.completion_token_count = AtomicInteger()
+        self.cache_hit_prompt_token_count = AtomicInteger()
 
     @retry(
         retry=retry_if_exception_type(openai.RateLimitError),
@@ -338,6 +339,10 @@ def update_token_count(self, response):
                 self.prompt_token_count.inc(response.usage.prompt_tokens)
             if response.usage and response.usage.completion_tokens:
                 self.completion_token_count.inc(response.usage.completion_tokens)
+            if response.usage and (
+                hit_count := getattr(response.usage, "prompt_cache_hit_tokens", 0)
+            ):
+                self.cache_hit_prompt_token_count.inc(hit_count)
         except Exception as e:
             logger.exception("Error updating token count")
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "BabelDOC"
-version = "0.5.14"
+version = "0.5.15"
 description = "Yet Another Document Translator"
 license = "AGPL-3.0"
 readme = "README.md"
@@ -162,7 +162,7 @@ pythonpath = [".", "src"]
 testpaths = ["tests"]
 
 [bumpver]
-current_version = "0.5.14"
+current_version = "0.5.15"
 version_pattern = "MAJOR.MINOR.PATCH[.PYTAGNUM]"
 
 [bumpver.file_patterns]

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.5.14"`
	`1`	`+__version__ = "0.5.15"`