Bug修复以及增加对xelatex安装的检测

2024-11-29 15:42:10 +08:00 · 2024-11-29 15:42:10 +08:00 · 3a03e3f3d3
parent 05a5add8da
commit 3a03e3f3d3
3 changed files with 19 additions and 8 deletions
--- a/crazy_functions/PDF_Translate.py
+++ b/crazy_functions/PDF_Translate.py
@ -47,7 +47,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
                yield from 解析PDF_基于DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request)
                return
            except:
-                chatbot.append([None, f"DOC2X服务不可用，现在将执行效果稍差的旧版代码。{trimmed_format_exc_markdown()}"])
+                chatbot.append([None, f"DOC2X服务不可用，请检查报错详细。{trimmed_format_exc_markdown()}"])
                yield from update_ui(chatbot=chatbot, history=history)

    if method == "GROBID":
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@ -367,7 +367,14 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
    # 确定使用的编译器
    compiler = 'pdflatex'
    if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')):
-        compiler = 'xelatex'
+        logger.info("检测到宏包需要xelatex编译，切换至xelatex编译")
+        # Check if xelatex is installed
+        try:
+            import subprocess
+            subprocess.run(['xelatex', '--version'], capture_output=True, check=True)
+            compiler = 'xelatex'
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            raise RuntimeError("检测到需要使用xelatex编译，但系统中未安装xelatex。请先安装texlive或其他提供xelatex的LaTeX发行版。")

    while True:
        import os
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@ -19,8 +19,8 @@ def 状态检查(response, uid=""):
    """
    response_json = response.json()
    response_data = response_json.get("data", {})
-    code = response_data.get("code", "Unknown")
-    meg = response_data.get("message", "")
+    code = response_json.get("code", "Unknown")
+    meg = response_data.get("message", response_json)
    trace_id = response.headers.get("trace-id", "Failed to get trace-id")
    if response.status_code != 200:
        raise RuntimeError(
@ -57,6 +57,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
    res = requests.post(
        "https://v2.doc2x.noedgeai.com/api/v2/parse/preupload",
        headers={"Authorization": "Bearer " + doc2x_api_key},
+        timeout=15,
    )
    res_data = 状态检查(res)
    upload_url = res_data["url"]
@ -64,7 +65,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):

    logger.info("Doc2x 上传文件：上传文件")
    with open(pdf_file_path, "rb") as file:
-        res = requests.put(upload_url, data=file)
+        res = requests.put(upload_url, data=file, timeout=60)
    res.raise_for_status()

    # < ------ 第2步：轮询等待 ------ >
@ -77,6 +78,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
            "https://v2.doc2x.noedgeai.com/api/v2/parse/status",
            headers={"Authorization": "Bearer " + doc2x_api_key},
            params=params,
+            timeout=15,
        )
        res_data = 状态检查(res)
        if res_data["status"] == "success":
@ -97,6 +99,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
        "https://v2.doc2x.noedgeai.com/api/v2/convert/parse",
        headers={"Authorization": "Bearer " + doc2x_api_key},
        json=data,
+        timeout=15,
    )
    状态检查(res, uid=f"uid: {uuid}")

@ -110,6 +113,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
            "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
            headers={"Authorization": "Bearer " + doc2x_api_key},
            params=params,
+            timeout=15,
        )
        res_data = 状态检查(res, uid=f"uid: {uuid}")
        if res_data["status"] == "success":
@ -122,7 +126,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
        raise RuntimeError("Doc2x conversion timeout after maximum attempts")

    # < ------ 第5步：最后的处理 ------ >
-    logger.info("Doc2x 第5步：最后的处理")
+    logger.info("Doc2x 第5步：下载转换后的文件")

    if format == "tex":
        target_path = latex_dir
@ -135,7 +139,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
    for attempt in range(max_attempt):
        try:
            result_url = res_data["url"]
-            res = requests.get(result_url)
+            res = requests.get(result_url, timeout=60)
            zip_path = os.path.join(target_path, gen_time_str() + ".zip")
            unzip_path = os.path.join(target_path, gen_time_str())
            if res.status_code == 200:
@ -145,7 +149,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
                raise RuntimeError(f"Doc2x return an error: {res.json()}")
        except Exception as e:
            if attempt < max_attempt - 1:
-                logger.error(f"Failed to download latex file, retrying... {e}")
+                logger.error(f"Failed to download uid = {uuid} file, retrying... {e}")
                time.sleep(3)
                continue
            else: