From e05f105063b4d39a58563247b23a8ce2df8d3e0f Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 28 Nov 2024 14:10:27 +0800
Subject: [PATCH 1/6] =?UTF-8?q?doc2x=E8=AF=B7=E6=B1=82=E5=87=BD=E6=95=B0?=
 =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E6=B8=85=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../pdf_fns/parse_pdf_via_doc2x.py            | 194 ++++++++++--------
 1 file changed, 106 insertions(+), 88 deletions(-)

diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index 97c62fbf..45d5ca1d 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -8,118 +8,105 @@ from loguru import logger
 import os
 import time
 
-def refresh_key(doc2x_api_key):
-    import requests, json
-    url = "https://api.doc2x.noedgeai.com/api/token/refresh"
-    res = requests.post(
-        url,
-        headers={"Authorization": "Bearer " + doc2x_api_key}
-    )
-    res_json = []
-    if res.status_code == 200:
-        decoded = res.content.decode("utf-8")
-        res_json = json.loads(decoded)
-        doc2x_api_key = res_json['data']['token']
-    else:
-        raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
-    return doc2x_api_key
 
+def 状态检查(code: str, meg: str, trace_id: str):
+    trace_id = trace_id or "Failed to get trace_id"
+    if code in ["parse_page_limit_exceeded", "parse_concurrency_limit"]:
+        raise RuntimeError(
+            f"Reached the limit of Doc2x:\nTrace ID: {trace_id}\n{code} - {meg}"
+        )
+    if code not in ["ok", "success"]:
+        raise RuntimeError(
+            f"Doc2x return an error:\nTrace ID: {trace_id}\n{code} - {meg}"
+        )
 
 
 def 解析PDF_DOC2X_转Latex(pdf_file_path):
-    zip_file_path, unzipped_folder = 解析PDF_DOC2X(pdf_file_path, format='tex')
+    zip_file_path, unzipped_folder = 解析PDF_DOC2X(pdf_file_path, format="tex")
     return unzipped_folder
 
 
-def 解析PDF_DOC2X(pdf_file_path, format='tex'):
+def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     """
-        format: 'tex', 'md', 'docx'
+    format: 'tex', 'md', 'docx'
     """
     import requests, json, os
-    DOC2X_API_KEY = get_conf('DOC2X_API_KEY')
+
+    DOC2X_API_KEY = get_conf("DOC2X_API_KEY")
     latex_dir = get_log_folder(plugin_name="pdf_ocr_latex")
     markdown_dir = get_log_folder(plugin_name="pdf_ocr")
     doc2x_api_key = DOC2X_API_KEY
 
-
     # < ------ 第1步：上传 ------ >
     logger.info("Doc2x 第1步：上传")
-    with open(pdf_file_path, 'rb') as file:
+    with open(pdf_file_path, "rb") as file:
         res = requests.post(
             "https://v2.doc2x.noedgeai.com/api/v2/parse/pdf",
             headers={"Authorization": "Bearer " + doc2x_api_key},
-            data=file
+            data=file,
         )
     # res_json = []
     if res.status_code == 200:
         res_json = res.json()
     else:
         raise RuntimeError(f"Doc2x return an error: {res.json()}")
-    uuid = res_json['data']['uid']
+    uuid = res_json["data"]["uid"]
 
     # < ------ 第2步：轮询等待 ------ >
     logger.info("Doc2x 第2步：轮询等待")
-    params = {'uid': uuid}
+    params = {"uid": uuid}
     while True:
         res = requests.get(
-            'https://v2.doc2x.noedgeai.com/api/v2/parse/status',
+            "https://v2.doc2x.noedgeai.com/api/v2/parse/status",
             headers={"Authorization": "Bearer " + doc2x_api_key},
-            params=params
+            params=params,
         )
         res_json = res.json()
-        if res_json['data']['status'] == "success":
+        if res_json["data"]["status"] == "success":
             break
-        elif res_json['data']['status'] == "processing":
+        elif res_json["data"]["status"] == "processing":
             time.sleep(3)
             logger.info(f"Doc2x is processing at {res_json['data']['progress']}%")
-        elif res_json['data']['status'] == "failed":
+        elif res_json["data"]["status"] == "failed":
             raise RuntimeError(f"Doc2x return an error: {res_json}")
 
-
     # < ------ 第3步：提交转化 ------ >
     logger.info("Doc2x 第3步：提交转化")
-    data = {
-        "uid": uuid,
-        "to": format,
-        "formula_mode": "dollar",
-        "filename": "output"
-    }
+    data = {"uid": uuid, "to": format, "formula_mode": "dollar", "filename": "output"}
     res = requests.post(
-        'https://v2.doc2x.noedgeai.com/api/v2/convert/parse',
+        "https://v2.doc2x.noedgeai.com/api/v2/convert/parse",
         headers={"Authorization": "Bearer " + doc2x_api_key},
-        json=data
+        json=data,
     )
     if res.status_code == 200:
         res_json = res.json()
     else:
         raise RuntimeError(f"Doc2x return an error: {res.json()}")
 
-
     # < ------ 第4步：等待结果 ------ >
     logger.info("Doc2x 第4步：等待结果")
-    params = {'uid': uuid}
+    params = {"uid": uuid}
     while True:
         res = requests.get(
-            'https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result',
+            "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
             headers={"Authorization": "Bearer " + doc2x_api_key},
-            params=params
+            params=params,
         )
         res_json = res.json()
-        if res_json['data']['status'] == "success":
+        if res_json["data"]["status"] == "success":
             break
-        elif res_json['data']['status'] == "processing":
+        elif res_json["data"]["status"] == "processing":
             time.sleep(3)
             logger.info(f"Doc2x still processing")
-        elif res_json['data']['status'] == "failed":
+        elif res_json["data"]["status"] == "failed":
             raise RuntimeError(f"Doc2x return an error: {res_json}")
 
-
     # < ------ 第5步：最后的处理 ------ >
     logger.info("Doc2x 第5步：最后的处理")
 
-    if format=='tex':
+    if format == "tex":
         target_path = latex_dir
-    if format=='md':
+    if format == "md":
         target_path = markdown_dir
     os.makedirs(target_path, exist_ok=True)
 
@@ -127,12 +114,13 @@ def 解析PDF_DOC2X(pdf_file_path, format='tex'):
     # < ------ 下载 ------ >
     for attempt in range(max_attempt):
         try:
-            result_url = res_json['data']['url']
+            result_url = res_json["data"]["url"]
             res = requests.get(result_url)
-            zip_path = os.path.join(target_path, gen_time_str() + '.zip')
+            zip_path = os.path.join(target_path, gen_time_str() + ".zip")
             unzip_path = os.path.join(target_path, gen_time_str())
             if res.status_code == 200:
-                with open(zip_path, "wb") as f: f.write(res.content)
+                with open(zip_path, "wb") as f:
+                    f.write(res.content)
             else:
                 raise RuntimeError(f"Doc2x return an error: {res.json()}")
         except Exception as e:
@@ -145,22 +133,32 @@ def 解析PDF_DOC2X(pdf_file_path, format='tex'):
 
     # < ------ 解压 ------ >
     import zipfile
-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
         zip_ref.extractall(unzip_path)
     return zip_path, unzip_path
 
 
-def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request):
-
+def 解析PDF_DOC2X_单文件(
+    fp,
+    project_folder,
+    llm_kwargs,
+    plugin_kwargs,
+    chatbot,
+    history,
+    system_prompt,
+    DOC2X_API_KEY,
+    user_request,
+):
     def pdf2markdown(filepath):
         chatbot.append((None, f"Doc2x 解析中"))
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
 
-        md_zip_path, unzipped_folder = 解析PDF_DOC2X(filepath, format='md')
+        md_zip_path, unzipped_folder = 解析PDF_DOC2X(filepath, format="md")
 
         promote_file_to_downloadzone(md_zip_path, chatbot=chatbot)
         chatbot.append((None, f"完成解析 {md_zip_path} ..."))
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
         return md_zip_path
 
     def deliver_to_markdown_plugin(md_zip_path, user_request):
@@ -174,77 +172,97 @@ def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, cha
         os.makedirs(target_path_base, exist_ok=True)
         shutil.copyfile(md_zip_path, this_file_path)
         ex_folder = this_file_path + ".extract"
-        extract_archive(
-            file_path=this_file_path, dest_dir=ex_folder
-        )
+        extract_archive(file_path=this_file_path, dest_dir=ex_folder)
 
         # edit markdown files
-        success, file_manifest, project_folder = get_files_from_everything(ex_folder, type='.md')
+        success, file_manifest, project_folder = get_files_from_everything(
+            ex_folder, type=".md"
+        )
         for generated_fp in file_manifest:
             # 修正一些公式问题
-            with open(generated_fp, 'r', encoding='utf8') as f:
+            with open(generated_fp, "r", encoding="utf8") as f:
                 content = f.read()
             # 将公式中的\[ \]替换成$$
-            content = content.replace(r'\[', r'$$').replace(r'\]', r'$$')
+            content = content.replace(r"\[", r"$$").replace(r"\]", r"$$")
             # 将公式中的\( \)替换成$
-            content = content.replace(r'\(', r'$').replace(r'\)', r'$')
-            content = content.replace('```markdown', '\n').replace('```', '\n')
-            with open(generated_fp, 'w', encoding='utf8') as f:
+            content = content.replace(r"\(", r"$").replace(r"\)", r"$")
+            content = content.replace("```markdown", "\n").replace("```", "\n")
+            with open(generated_fp, "w", encoding="utf8") as f:
                 f.write(content)
             promote_file_to_downloadzone(generated_fp, chatbot=chatbot)
-            yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+            yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
 
             # 生成在线预览html
-            file_name = '在线预览翻译（原文）' + gen_time_str() + '.html'
+            file_name = "在线预览翻译（原文）" + gen_time_str() + ".html"
             preview_fp = os.path.join(ex_folder, file_name)
-            from shared_utils.advanced_markdown_format import markdown_convertion_for_file
+            from shared_utils.advanced_markdown_format import (
+                markdown_convertion_for_file,
+            )
+
             with open(generated_fp, "r", encoding="utf-8") as f:
                 md = f.read()
             #     # Markdown中使用不标准的表格，需要在表格前加上一个emoji，以便公式渲染
             #     md = re.sub(r'^<table>', r'.<table>', md, flags=re.MULTILINE)
             html = markdown_convertion_for_file(md)
-            with open(preview_fp, "w", encoding="utf-8") as f: f.write(html)
+            with open(preview_fp, "w", encoding="utf-8") as f:
+                f.write(html)
             chatbot.append([None, f"生成在线预览：{generate_file_link([preview_fp])}"])
             promote_file_to_downloadzone(preview_fp, chatbot=chatbot)
 
-
-
         chatbot.append((None, f"调用Markdown插件 {ex_folder} ..."))
-        plugin_kwargs['markdown_expected_output_dir'] = ex_folder
+        plugin_kwargs["markdown_expected_output_dir"] = ex_folder
 
-        translated_f_name = 'translated_markdown.md'
-        generated_fp = plugin_kwargs['markdown_expected_output_path'] = os.path.join(ex_folder, translated_f_name)
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-        yield from Markdown英译中(ex_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
+        translated_f_name = "translated_markdown.md"
+        generated_fp = plugin_kwargs["markdown_expected_output_path"] = os.path.join(
+            ex_folder, translated_f_name
+        )
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+        yield from Markdown英译中(
+            ex_folder,
+            llm_kwargs,
+            plugin_kwargs,
+            chatbot,
+            history,
+            system_prompt,
+            user_request,
+        )
         if os.path.exists(generated_fp):
             # 修正一些公式问题
-            with open(generated_fp, 'r', encoding='utf8') as f: content = f.read()
-            content = content.replace('```markdown', '\n').replace('```', '\n')
+            with open(generated_fp, "r", encoding="utf8") as f:
+                content = f.read()
+            content = content.replace("```markdown", "\n").replace("```", "\n")
             # Markdown中使用不标准的表格，需要在表格前加上一个emoji，以便公式渲染
             # content = re.sub(r'^<table>', r'.<table>', content, flags=re.MULTILINE)
-            with open(generated_fp, 'w', encoding='utf8') as f: f.write(content)
+            with open(generated_fp, "w", encoding="utf8") as f:
+                f.write(content)
             # 生成在线预览html
-            file_name = '在线预览翻译' + gen_time_str() + '.html'
+            file_name = "在线预览翻译" + gen_time_str() + ".html"
             preview_fp = os.path.join(ex_folder, file_name)
-            from shared_utils.advanced_markdown_format import markdown_convertion_for_file
+            from shared_utils.advanced_markdown_format import (
+                markdown_convertion_for_file,
+            )
+
             with open(generated_fp, "r", encoding="utf-8") as f:
                 md = f.read()
             html = markdown_convertion_for_file(md)
-            with open(preview_fp, "w", encoding="utf-8") as f: f.write(html)
+            with open(preview_fp, "w", encoding="utf-8") as f:
+                f.write(html)
             promote_file_to_downloadzone(preview_fp, chatbot=chatbot)
             # 生成包含图片的压缩包
             dest_folder = get_log_folder(chatbot.get_user())
-            zip_name = '翻译后的带图文档.zip'
-            zip_folder(source_folder=ex_folder, dest_folder=dest_folder, zip_name=zip_name)
+            zip_name = "翻译后的带图文档.zip"
+            zip_folder(
+                source_folder=ex_folder, dest_folder=dest_folder, zip_name=zip_name
+            )
             zip_fp = os.path.join(dest_folder, zip_name)
             promote_file_to_downloadzone(zip_fp, chatbot=chatbot)
-            yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+            yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+
     md_zip_path = yield from pdf2markdown(fp)
     yield from deliver_to_markdown_plugin(md_zip_path, user_request)
 
+
 def 解析PDF_基于DOC2X(file_manifest, *args):
     for index, fp in enumerate(file_manifest):
         yield from 解析PDF_DOC2X_单文件(fp, *args)
     return
-
-

From c49e89608295c123b807f89a457baa6dd6ff1bd5 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 28 Nov 2024 20:58:26 +0800
Subject: [PATCH 2/6] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=B8=AD=E9=97=B4?=
 =?UTF-8?q?=E9=83=A8=E5=88=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../pdf_fns/parse_pdf_via_doc2x.py            | 84 +++++++++++--------
 1 file changed, 47 insertions(+), 37 deletions(-)

diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index 45d5ca1d..3a37a530 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -1,24 +1,40 @@
 from toolbox import get_log_folder, gen_time_str, get_conf
 from toolbox import update_ui, promote_file_to_downloadzone
-from toolbox import promote_file_to_downloadzone, extract_archive
+from toolbox import extract_archive
 from toolbox import generate_file_link, zip_folder
 from crazy_functions.crazy_utils import get_files_from_everything
 from shared_utils.colorful import *
 from loguru import logger
 import os
+import requests
 import time
+import json
 
 
-def 状态检查(code: str, meg: str, trace_id: str):
-    trace_id = trace_id or "Failed to get trace_id"
+def 状态检查(response, uid=""):
+    """
+    Check the status of Doc2x API response
+    Args:
+        response_data: Response object from Doc2x API
+    """
+    response_json = response.json()
+    response_data = response_json.get("data", {})
+    code = response_data.get("code", "Unknown")
+    meg = response_data.get("message", "")
+    trace_id = response.headers.get("trace-id", "Failed to get trace-id")
+    if response.status_code != 200:
+        raise RuntimeError(
+            f"Doc2x return an error:\nTrace ID: {trace_id} {uid}\n{response.status_code} - {response_json}"
+        )
     if code in ["parse_page_limit_exceeded", "parse_concurrency_limit"]:
         raise RuntimeError(
-            f"Reached the limit of Doc2x:\nTrace ID: {trace_id}\n{code} - {meg}"
+            f"Reached the limit of Doc2x:\nTrace ID: {trace_id} {uid}\n{code} - {meg}"
         )
     if code not in ["ok", "success"]:
         raise RuntimeError(
-            f"Doc2x return an error:\nTrace ID: {trace_id}\n{code} - {meg}"
+            f"Doc2x return an error:\nTrace ID: {trace_id} {uid}\n{code} - {meg}"
         )
+    return response_data
 
 
 def 解析PDF_DOC2X_转Latex(pdf_file_path):
@@ -30,30 +46,29 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     """
     format: 'tex', 'md', 'docx'
     """
-    import requests, json, os
 
     DOC2X_API_KEY = get_conf("DOC2X_API_KEY")
     latex_dir = get_log_folder(plugin_name="pdf_ocr_latex")
     markdown_dir = get_log_folder(plugin_name="pdf_ocr")
     doc2x_api_key = DOC2X_API_KEY
 
-    # < ------ 第1步：上传 ------ >
-    logger.info("Doc2x 第1步：上传")
+    # < ------ 第1步：预上传获取URL，然后上传文件 ------ >
+    logger.info("Doc2x 上传文件：预上传获取URL")
+    res = requests.post(
+        "https://v2.doc2x.noedgeai.com/api/v2/parse/preupload",
+        headers={"Authorization": "Bearer " + doc2x_api_key},
+    )
+    res_data = 状态检查(res)
+    upload_url = res_data["url"]
+    uuid = res_data["uid"]
+
+    logger.info("Doc2x 上传文件：上传文件")
     with open(pdf_file_path, "rb") as file:
-        res = requests.post(
-            "https://v2.doc2x.noedgeai.com/api/v2/parse/pdf",
-            headers={"Authorization": "Bearer " + doc2x_api_key},
-            data=file,
-        )
-    # res_json = []
-    if res.status_code == 200:
-        res_json = res.json()
-    else:
-        raise RuntimeError(f"Doc2x return an error: {res.json()}")
-    uuid = res_json["data"]["uid"]
+        res = requests.put(upload_url, data=file)
+    res.raise_for_status()
 
     # < ------ 第2步：轮询等待 ------ >
-    logger.info("Doc2x 第2步：轮询等待")
+    logger.info("Doc2x 处理文件中：轮询等待")
     params = {"uid": uuid}
     while True:
         res = requests.get(
@@ -61,14 +76,14 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
         )
-        res_json = res.json()
-        if res_json["data"]["status"] == "success":
+        res_data = 状态检查(res)
+        if res_data["status"] == "success":
             break
-        elif res_json["data"]["status"] == "processing":
-            time.sleep(3)
-            logger.info(f"Doc2x is processing at {res_json['data']['progress']}%")
-        elif res_json["data"]["status"] == "failed":
-            raise RuntimeError(f"Doc2x return an error: {res_json}")
+        elif res_data["status"] == "processing":
+            time.sleep(5)
+            logger.info(f"Doc2x is processing at {res_data['progress']}%")
+        else:
+            raise RuntimeError(f"Doc2x return an error: {res_data}")
 
     # < ------ 第3步：提交转化 ------ >
     logger.info("Doc2x 第3步：提交转化")
@@ -78,10 +93,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
         headers={"Authorization": "Bearer " + doc2x_api_key},
         json=data,
     )
-    if res.status_code == 200:
-        res_json = res.json()
-    else:
-        raise RuntimeError(f"Doc2x return an error: {res.json()}")
+    状态检查(res, uid=f"uid: {uuid}")
 
     # < ------ 第4步：等待结果 ------ >
     logger.info("Doc2x 第4步：等待结果")
@@ -92,14 +104,12 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
         )
-        res_json = res.json()
-        if res_json["data"]["status"] == "success":
+        res_data = 状态检查(res, uid=f"uid: {uuid}")
+        if res_data["status"] == "success":
             break
-        elif res_json["data"]["status"] == "processing":
+        elif res_data["status"] == "processing":
             time.sleep(3)
-            logger.info(f"Doc2x still processing")
-        elif res_json["data"]["status"] == "failed":
-            raise RuntimeError(f"Doc2x return an error: {res_json}")
+            logger.info("Doc2x still processing to convert file")
 
     # < ------ 第5步：最后的处理 ------ >
     logger.info("Doc2x 第5步：最后的处理")

From 05a5add8da2ae76d4063ba87296984ae0ce69df0 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Fri, 29 Nov 2024 15:02:58 +0800
Subject: [PATCH 3/6] =?UTF-8?q?=20=20=E6=B7=BB=E5=8A=A0doc2x=E8=B6=85?=
 =?UTF-8?q?=E6=97=B6=E8=AE=BE=E7=BD=AE=E5=B9=B6=E6=B7=BB=E5=8A=A0=E5=AF=B9?=
 =?UTF-8?q?xelatex=E7=BC=96=E8=AF=91=E7=9A=84=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_fns/latex_actions.py    | 37 ++++++++++++++-----
 .../pdf_fns/parse_pdf_via_doc2x.py            | 16 ++++++--
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py
index df5135bb..ec1ec3e8 100644
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -342,7 +342,6 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
         logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
         return False, -1, [-1]
 
-
 def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
     import os, time
     n_fix = 1
@@ -351,6 +350,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
     chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}，如果程序停顿5分钟以上，请直接去该路径下取回翻译结果，或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
     chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
     yield from update_ui_lastest_msg('编译已经开始...', chatbot, history)   # 刷新Gradio前端界面
+    # 检查是否需要使用xelatex
+    def check_if_need_xelatex(tex_path):
+        try:
+            with open(tex_path, 'r', encoding='utf-8', errors='replace') as f:
+                content = f.read(5000)
+                # 检查是否有使用xelatex的宏包
+                return any(pkg in content for pkg in ['fontspec', 'xeCJK', 'xetex', 'unicode-math', 'xltxtra', 'xunicode'])
+        except Exception:
+            return False
+
+    # 根据编译器类型返回编译命令
+    def get_compile_command(compiler, filename):
+        return f'{compiler} -interaction=batchmode -file-line-error {filename}.tex'
+
+    # 确定使用的编译器
+    compiler = 'pdflatex'
+    if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')):
+        compiler = 'xelatex'
 
     while True:
         import os
@@ -361,10 +378,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 
         # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
         yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
-        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
+        ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
 
         yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history)   # 刷新Gradio前端界面
-        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
+        ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
 
         if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
             # 只有第二步成功，才能继续下面的步骤
@@ -375,10 +392,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
                 ok = compile_latex_with_timeout(f'bibtex  {main_file_modified}.aux', work_folder_modified)
 
             yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history)  # 刷新Gradio前端界面
-            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
-            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
-            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
-            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
+            ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
+            ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
+            ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
+            ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
 
             if mode!='translate_zh':
                 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
@@ -386,10 +403,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
                 ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
 
                 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
-                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
+                ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
                 ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux', work_folder)
-                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
-                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
+                ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
+                ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
 
         # <---------- 检查结果 ----------->
         results_ = ""
diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index 3a37a530..b770bdab 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -70,7 +70,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     # < ------ 第2步：轮询等待 ------ >
     logger.info("Doc2x 处理文件中：轮询等待")
     params = {"uid": uuid}
-    while True:
+    max_attempts = 60
+    attempt = 0
+    while attempt < max_attempts:
         res = requests.get(
             "https://v2.doc2x.noedgeai.com/api/v2/parse/status",
             headers={"Authorization": "Bearer " + doc2x_api_key},
@@ -82,8 +84,11 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
         elif res_data["status"] == "processing":
             time.sleep(5)
             logger.info(f"Doc2x is processing at {res_data['progress']}%")
+            attempt += 1
         else:
             raise RuntimeError(f"Doc2x return an error: {res_data}")
+    if attempt >= max_attempts:
+        raise RuntimeError("Doc2x processing timeout after maximum attempts")
 
     # < ------ 第3步：提交转化 ------ >
     logger.info("Doc2x 第3步：提交转化")
@@ -98,7 +103,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     # < ------ 第4步：等待结果 ------ >
     logger.info("Doc2x 第4步：等待结果")
     params = {"uid": uuid}
-    while True:
+    max_attempts = 36
+    attempt = 0
+    while attempt < max_attempts:
         res = requests.get(
             "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
             headers={"Authorization": "Bearer " + doc2x_api_key},
@@ -110,6 +117,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
         elif res_data["status"] == "processing":
             time.sleep(3)
             logger.info("Doc2x still processing to convert file")
+            attempt += 1
+    if attempt >= max_attempts:
+        raise RuntimeError("Doc2x conversion timeout after maximum attempts")
 
     # < ------ 第5步：最后的处理 ------ >
     logger.info("Doc2x 第5步：最后的处理")
@@ -124,7 +134,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     # < ------ 下载 ------ >
     for attempt in range(max_attempt):
         try:
-            result_url = res_json["data"]["url"]
+            result_url = res_data["url"]
             res = requests.get(result_url)
             zip_path = os.path.join(target_path, gen_time_str() + ".zip")
             unzip_path = os.path.join(target_path, gen_time_str())

From 3a03e3f3d3ca288cf6318d93030a92038fa639d3 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Fri, 29 Nov 2024 15:42:10 +0800
Subject: [PATCH 4/6] =?UTF-8?q?Bug=E4=BF=AE=E5=A4=8D=E4=BB=A5=E5=8F=8A?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AF=B9xelatex=E5=AE=89=E8=A3=85=E7=9A=84?=
 =?UTF-8?q?=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/PDF_Translate.py               |  2 +-
 crazy_functions/latex_fns/latex_actions.py     |  9 ++++++++-
 crazy_functions/pdf_fns/parse_pdf_via_doc2x.py | 16 ++++++++++------
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/crazy_functions/PDF_Translate.py b/crazy_functions/PDF_Translate.py
index 0f93c821..a4d10837 100644
--- a/crazy_functions/PDF_Translate.py
+++ b/crazy_functions/PDF_Translate.py
@@ -47,7 +47,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
                 yield from 解析PDF_基于DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request)
                 return
             except:
-                chatbot.append([None, f"DOC2X服务不可用，现在将执行效果稍差的旧版代码。{trimmed_format_exc_markdown()}"])
+                chatbot.append([None, f"DOC2X服务不可用，请检查报错详细。{trimmed_format_exc_markdown()}"])
                 yield from update_ui(chatbot=chatbot, history=history)
 
     if method == "GROBID":
diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py
index ec1ec3e8..a4486e95 100644
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -367,7 +367,14 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
     # 确定使用的编译器
     compiler = 'pdflatex'
     if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')):
-        compiler = 'xelatex'
+        logger.info("检测到宏包需要xelatex编译，切换至xelatex编译")
+        # Check if xelatex is installed
+        try:
+            import subprocess
+            subprocess.run(['xelatex', '--version'], capture_output=True, check=True)
+            compiler = 'xelatex'
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            raise RuntimeError("检测到需要使用xelatex编译，但系统中未安装xelatex。请先安装texlive或其他提供xelatex的LaTeX发行版。")
 
     while True:
         import os
diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index b770bdab..7d251066 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -19,8 +19,8 @@ def 状态检查(response, uid=""):
     """
     response_json = response.json()
     response_data = response_json.get("data", {})
-    code = response_data.get("code", "Unknown")
-    meg = response_data.get("message", "")
+    code = response_json.get("code", "Unknown")
+    meg = response_data.get("message", response_json)
     trace_id = response.headers.get("trace-id", "Failed to get trace-id")
     if response.status_code != 200:
         raise RuntimeError(
@@ -57,6 +57,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     res = requests.post(
         "https://v2.doc2x.noedgeai.com/api/v2/parse/preupload",
         headers={"Authorization": "Bearer " + doc2x_api_key},
+        timeout=15,
     )
     res_data = 状态检查(res)
     upload_url = res_data["url"]
@@ -64,7 +65,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
 
     logger.info("Doc2x 上传文件：上传文件")
     with open(pdf_file_path, "rb") as file:
-        res = requests.put(upload_url, data=file)
+        res = requests.put(upload_url, data=file, timeout=60)
     res.raise_for_status()
 
     # < ------ 第2步：轮询等待 ------ >
@@ -77,6 +78,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
             "https://v2.doc2x.noedgeai.com/api/v2/parse/status",
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
+            timeout=15,
         )
         res_data = 状态检查(res)
         if res_data["status"] == "success":
@@ -97,6 +99,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
         "https://v2.doc2x.noedgeai.com/api/v2/convert/parse",
         headers={"Authorization": "Bearer " + doc2x_api_key},
         json=data,
+        timeout=15,
     )
     状态检查(res, uid=f"uid: {uuid}")
 
@@ -110,6 +113,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
             "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
+            timeout=15,
         )
         res_data = 状态检查(res, uid=f"uid: {uuid}")
         if res_data["status"] == "success":
@@ -122,7 +126,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
         raise RuntimeError("Doc2x conversion timeout after maximum attempts")
 
     # < ------ 第5步：最后的处理 ------ >
-    logger.info("Doc2x 第5步：最后的处理")
+    logger.info("Doc2x 第5步：下载转换后的文件")
 
     if format == "tex":
         target_path = latex_dir
@@ -135,7 +139,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     for attempt in range(max_attempt):
         try:
             result_url = res_data["url"]
-            res = requests.get(result_url)
+            res = requests.get(result_url, timeout=60)
             zip_path = os.path.join(target_path, gen_time_str() + ".zip")
             unzip_path = os.path.join(target_path, gen_time_str())
             if res.status_code == 200:
@@ -145,7 +149,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
                 raise RuntimeError(f"Doc2x return an error: {res.json()}")
         except Exception as e:
             if attempt < max_attempt - 1:
-                logger.error(f"Failed to download latex file, retrying... {e}")
+                logger.error(f"Failed to download uid = {uuid} file, retrying... {e}")
                 time.sleep(3)
                 continue
             else:

From ef72a00726e8ff85c88089fec9e914de508727e4 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Fri, 29 Nov 2024 15:54:27 +0800
Subject: [PATCH 5/6] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E5=BC=B1=E7=BD=91?=
 =?UTF-8?q?=E7=8E=AF=E5=A2=83=E4=B8=8B=E7=9A=84=E7=A8=B3=E5=AE=9A=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../pdf_fns/parse_pdf_via_doc2x.py            | 53 ++++++++++++++++---
 1 file changed, 46 insertions(+), 7 deletions(-)

diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index 7d251066..64627d05 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -8,7 +8,42 @@ from loguru import logger
 import os
 import requests
 import time
-import json
+
+
+def retry_request(max_retries=3, delay=3):
+    """
+    Decorator for retrying HTTP requests
+    Args:
+        max_retries: Maximum number of retry attempts
+        delay: Delay between retries in seconds
+    """
+
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            for attempt in range(max_retries):
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    if attempt < max_retries - 1:
+                        logger.error(
+                            f"Request failed, retrying... ({attempt + 1}/{max_retries}) Error: {e}"
+                        )
+                        time.sleep(delay)
+                        continue
+                    raise e
+            return None
+
+        return wrapper
+
+    return decorator
+
+
+@retry_request()
+def make_request(method, url, **kwargs):
+    """
+    Make HTTP request with retry mechanism
+    """
+    return requests.request(method, url, **kwargs)
 
 
 def 状态检查(response, uid=""):
@@ -54,7 +89,8 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
 
     # < ------ 第1步：预上传获取URL，然后上传文件 ------ >
     logger.info("Doc2x 上传文件：预上传获取URL")
-    res = requests.post(
+    res = make_request(
+        "POST",
         "https://v2.doc2x.noedgeai.com/api/v2/parse/preupload",
         headers={"Authorization": "Bearer " + doc2x_api_key},
         timeout=15,
@@ -65,7 +101,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
 
     logger.info("Doc2x 上传文件：上传文件")
     with open(pdf_file_path, "rb") as file:
-        res = requests.put(upload_url, data=file, timeout=60)
+        res = make_request("PUT", upload_url, data=file, timeout=60)
     res.raise_for_status()
 
     # < ------ 第2步：轮询等待 ------ >
@@ -74,7 +110,8 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     max_attempts = 60
     attempt = 0
     while attempt < max_attempts:
-        res = requests.get(
+        res = make_request(
+            "GET",
             "https://v2.doc2x.noedgeai.com/api/v2/parse/status",
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
@@ -95,7 +132,8 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     # < ------ 第3步：提交转化 ------ >
     logger.info("Doc2x 第3步：提交转化")
     data = {"uid": uuid, "to": format, "formula_mode": "dollar", "filename": "output"}
-    res = requests.post(
+    res = make_request(
+        "POST",
         "https://v2.doc2x.noedgeai.com/api/v2/convert/parse",
         headers={"Authorization": "Bearer " + doc2x_api_key},
         json=data,
@@ -109,7 +147,8 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     max_attempts = 36
     attempt = 0
     while attempt < max_attempts:
-        res = requests.get(
+        res = make_request(
+            "GET",
             "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
@@ -139,7 +178,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     for attempt in range(max_attempt):
         try:
             result_url = res_data["url"]
-            res = requests.get(result_url, timeout=60)
+            res = make_request("GET", result_url, timeout=60)
             zip_path = os.path.join(target_path, gen_time_str() + ".zip")
             unzip_path = os.path.join(target_path, gen_time_str())
             if res.status_code == 200:

From 86ec3e365496093e9f9f7c6abea4ecae8e013e8a Mon Sep 17 00:00:00 2001
From: Menghuan1918 <Menghuan2003@outlook.com>
Date: Tue, 3 Dec 2024 23:28:54 +0800
Subject: [PATCH 6/6] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=A8=A1=E5=9E=8B?=
 =?UTF-8?q?=E4=B8=AD=5F=E6=97=A0=E6=B3=95=E6=98=BE=E7=A4=BA=E7=9A=84?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_fns/latex_actions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py
index a4486e95..63c6a644 100644
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -300,7 +300,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
     write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
 
     #  <-------- 写出文件 ---------->
-    msg = f"当前大语言模型: {llm_kwargs['llm_model']}，当前语言模型温度设定: {llm_kwargs['temperature']}。"
+    model_name = llm_kwargs['llm_model'].replace('_', '\\_')  # 替换LLM模型名称中的下划线为转义字符
+    msg = f"当前大语言模型: {model_name}，当前语言模型温度设定: {llm_kwargs['temperature']}。"
     final_tex = lps.merge_result(pfg.file_result, mode, msg)
     objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))