From 3a03e3f3d3ca288cf6318d93030a92038fa639d3 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Fri, 29 Nov 2024 15:42:10 +0800
Subject: [PATCH] =?UTF-8?q?Bug=E4=BF=AE=E5=A4=8D=E4=BB=A5=E5=8F=8A?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AF=B9xelatex=E5=AE=89=E8=A3=85=E7=9A=84?=
 =?UTF-8?q?=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/PDF_Translate.py               |  2 +-
 crazy_functions/latex_fns/latex_actions.py     |  9 ++++++++-
 crazy_functions/pdf_fns/parse_pdf_via_doc2x.py | 16 ++++++++++------
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/crazy_functions/PDF_Translate.py b/crazy_functions/PDF_Translate.py
index 0f93c821..a4d10837 100644
--- a/crazy_functions/PDF_Translate.py
+++ b/crazy_functions/PDF_Translate.py
@@ -47,7 +47,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
                 yield from 解析PDF_基于DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request)
                 return
             except:
-                chatbot.append([None, f"DOC2X服务不可用，现在将执行效果稍差的旧版代码。{trimmed_format_exc_markdown()}"])
+                chatbot.append([None, f"DOC2X服务不可用，请检查报错详细。{trimmed_format_exc_markdown()}"])
                 yield from update_ui(chatbot=chatbot, history=history)
 
     if method == "GROBID":
diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py
index ec1ec3e8..a4486e95 100644
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -367,7 +367,14 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
     # 确定使用的编译器
     compiler = 'pdflatex'
     if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')):
-        compiler = 'xelatex'
+        logger.info("检测到宏包需要xelatex编译，切换至xelatex编译")
+        # Check if xelatex is installed
+        try:
+            import subprocess
+            subprocess.run(['xelatex', '--version'], capture_output=True, check=True)
+            compiler = 'xelatex'
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            raise RuntimeError("检测到需要使用xelatex编译，但系统中未安装xelatex。请先安装texlive或其他提供xelatex的LaTeX发行版。")
 
     while True:
         import os
diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index b770bdab..7d251066 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -19,8 +19,8 @@ def 状态检查(response, uid=""):
     """
     response_json = response.json()
     response_data = response_json.get("data", {})
-    code = response_data.get("code", "Unknown")
-    meg = response_data.get("message", "")
+    code = response_json.get("code", "Unknown")
+    meg = response_data.get("message", response_json)
     trace_id = response.headers.get("trace-id", "Failed to get trace-id")
     if response.status_code != 200:
         raise RuntimeError(
@@ -57,6 +57,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     res = requests.post(
         "https://v2.doc2x.noedgeai.com/api/v2/parse/preupload",
         headers={"Authorization": "Bearer " + doc2x_api_key},
+        timeout=15,
     )
     res_data = 状态检查(res)
     upload_url = res_data["url"]
@@ -64,7 +65,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
 
     logger.info("Doc2x 上传文件：上传文件")
     with open(pdf_file_path, "rb") as file:
-        res = requests.put(upload_url, data=file)
+        res = requests.put(upload_url, data=file, timeout=60)
     res.raise_for_status()
 
     # < ------ 第2步：轮询等待 ------ >
@@ -77,6 +78,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
             "https://v2.doc2x.noedgeai.com/api/v2/parse/status",
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
+            timeout=15,
         )
         res_data = 状态检查(res)
         if res_data["status"] == "success":
@@ -97,6 +99,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
         "https://v2.doc2x.noedgeai.com/api/v2/convert/parse",
         headers={"Authorization": "Bearer " + doc2x_api_key},
         json=data,
+        timeout=15,
     )
     状态检查(res, uid=f"uid: {uuid}")
 
@@ -110,6 +113,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
             "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
             headers={"Authorization": "Bearer " + doc2x_api_key},
             params=params,
+            timeout=15,
         )
         res_data = 状态检查(res, uid=f"uid: {uuid}")
         if res_data["status"] == "success":
@@ -122,7 +126,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
         raise RuntimeError("Doc2x conversion timeout after maximum attempts")
 
     # < ------ 第5步：最后的处理 ------ >
-    logger.info("Doc2x 第5步：最后的处理")
+    logger.info("Doc2x 第5步：下载转换后的文件")
 
     if format == "tex":
         target_path = latex_dir
@@ -135,7 +139,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
     for attempt in range(max_attempt):
         try:
             result_url = res_data["url"]
-            res = requests.get(result_url)
+            res = requests.get(result_url, timeout=60)
             zip_path = os.path.join(target_path, gen_time_str() + ".zip")
             unzip_path = os.path.join(target_path, gen_time_str())
             if res.status_code == 200:
@@ -145,7 +149,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
                 raise RuntimeError(f"Doc2x return an error: {res.json()}")
         except Exception as e:
             if attempt < max_attempt - 1:
-                logger.error(f"Failed to download latex file, retrying... {e}")
+                logger.error(f"Failed to download uid = {uuid} file, retrying... {e}")
                 time.sleep(3)
                 continue
             else: