From 3a03e3f3d3ca288cf6318d93030a92038fa639d3 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 29 Nov 2024 15:42:10 +0800 Subject: [PATCH] =?UTF-8?q?Bug=E4=BF=AE=E5=A4=8D=E4=BB=A5=E5=8F=8A?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AF=B9xelatex=E5=AE=89=E8=A3=85=E7=9A=84?= =?UTF-8?q?=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/PDF_Translate.py | 2 +- crazy_functions/latex_fns/latex_actions.py | 9 ++++++++- crazy_functions/pdf_fns/parse_pdf_via_doc2x.py | 16 ++++++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/crazy_functions/PDF_Translate.py b/crazy_functions/PDF_Translate.py index 0f93c821..a4d10837 100644 --- a/crazy_functions/PDF_Translate.py +++ b/crazy_functions/PDF_Translate.py @@ -47,7 +47,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst yield from 解析PDF_基于DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request) return except: - chatbot.append([None, f"DOC2X服务不可用,现在将执行效果稍差的旧版代码。{trimmed_format_exc_markdown()}"]) + chatbot.append([None, f"DOC2X服务不可用,请检查报错详细。{trimmed_format_exc_markdown()}"]) yield from update_ui(chatbot=chatbot, history=history) if method == "GROBID": diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index ec1ec3e8..a4486e95 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -367,7 +367,14 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f # 确定使用的编译器 compiler = 'pdflatex' if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')): - compiler = 'xelatex' + logger.info("检测到宏包需要xelatex编译,切换至xelatex编译") + # Check if xelatex is installed + try: + import subprocess + subprocess.run(['xelatex', '--version'], capture_output=True, check=True) + compiler = 'xelatex' + except (subprocess.CalledProcessError, FileNotFoundError): + raise RuntimeError("检测到需要使用xelatex编译,但系统中未安装xelatex。请先安装texlive或其他提供xelatex的LaTeX发行版。") while True: import os diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py index b770bdab..7d251066 100644 --- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py +++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py @@ -19,8 +19,8 @@ def 状态检查(response, uid=""): """ response_json = response.json() response_data = response_json.get("data", {}) - code = response_data.get("code", "Unknown") - meg = response_data.get("message", "") + code = response_json.get("code", "Unknown") + meg = response_data.get("message", response_json) trace_id = response.headers.get("trace-id", "Failed to get trace-id") if response.status_code != 200: raise RuntimeError( @@ -57,6 +57,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): res = requests.post( "https://v2.doc2x.noedgeai.com/api/v2/parse/preupload", headers={"Authorization": "Bearer " + doc2x_api_key}, + timeout=15, ) res_data = 状态检查(res) upload_url = res_data["url"] @@ -64,7 +65,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): logger.info("Doc2x 上传文件:上传文件") with open(pdf_file_path, "rb") as file: - res = requests.put(upload_url, data=file) + res = requests.put(upload_url, data=file, timeout=60) res.raise_for_status() # < ------ 第2步:轮询等待 ------ > @@ -77,6 +78,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): "https://v2.doc2x.noedgeai.com/api/v2/parse/status", headers={"Authorization": "Bearer " + doc2x_api_key}, params=params, + timeout=15, ) res_data = 状态检查(res) if res_data["status"] == "success": @@ -97,6 +99,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): "https://v2.doc2x.noedgeai.com/api/v2/convert/parse", headers={"Authorization": "Bearer " + doc2x_api_key}, json=data, + timeout=15, ) 状态检查(res, uid=f"uid: {uuid}") @@ -110,6 +113,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result", headers={"Authorization": "Bearer " + doc2x_api_key}, params=params, + timeout=15, ) res_data = 状态检查(res, uid=f"uid: {uuid}") if res_data["status"] == "success": @@ -122,7 +126,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): raise RuntimeError("Doc2x conversion timeout after maximum attempts") # < ------ 第5步:最后的处理 ------ > - logger.info("Doc2x 第5步:最后的处理") + logger.info("Doc2x 第5步:下载转换后的文件") if format == "tex": target_path = latex_dir @@ -135,7 +139,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): for attempt in range(max_attempt): try: result_url = res_data["url"] - res = requests.get(result_url) + res = requests.get(result_url, timeout=60) zip_path = os.path.join(target_path, gen_time_str() + ".zip") unzip_path = os.path.join(target_path, gen_time_str()) if res.status_code == 200: @@ -145,7 +149,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): raise RuntimeError(f"Doc2x return an error: {res.json()}") except Exception as e: if attempt < max_attempt - 1: - logger.error(f"Failed to download latex file, retrying... {e}") + logger.error(f"Failed to download uid = {uuid} file, retrying... {e}") time.sleep(3) continue else: