diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index df5135bb..ec1ec3e8 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -342,7 +342,6 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") return False, -1, [-1] - def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'): import os, time n_fix = 1 @@ -351,6 +350,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面 yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面 + # 检查是否需要使用xelatex + def check_if_need_xelatex(tex_path): + try: + with open(tex_path, 'r', encoding='utf-8', errors='replace') as f: + content = f.read(5000) + # 检查是否有使用xelatex的宏包 + return any(pkg in content for pkg in ['fontspec', 'xeCJK', 'xetex', 'unicode-math', 'xltxtra', 'xunicode']) + except Exception: + return False + + # 根据编译器类型返回编译命令 + def get_compile_command(compiler, filename): + return f'{compiler} -interaction=batchmode -file-line-error {filename}.tex' + + # 确定使用的编译器 + compiler = 'pdflatex' + if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')): + compiler = 'xelatex' while True: import os @@ -361,10 +378,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) + ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original) yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面 - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) + ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified) if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')): # 只有第二步成功,才能继续下面的步骤 @@ -375,10 +392,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified) yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面 - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) + ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original) + ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified) + ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original) + ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified) if mode!='translate_zh': yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 @@ -386,10 +403,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd()) yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) + ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder) ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder) - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) - ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) + ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder) + ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder) # <---------- 检查结果 -----------> results_ = "" diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py index 3a37a530..b770bdab 100644 --- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py +++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py @@ -70,7 +70,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): # < ------ 第2步:轮询等待 ------ > logger.info("Doc2x 处理文件中:轮询等待") params = {"uid": uuid} - while True: + max_attempts = 60 + attempt = 0 + while attempt < max_attempts: res = requests.get( "https://v2.doc2x.noedgeai.com/api/v2/parse/status", headers={"Authorization": "Bearer " + doc2x_api_key}, @@ -82,8 +84,11 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): elif res_data["status"] == "processing": time.sleep(5) logger.info(f"Doc2x is processing at {res_data['progress']}%") + attempt += 1 else: raise RuntimeError(f"Doc2x return an error: {res_data}") + if attempt >= max_attempts: + raise RuntimeError("Doc2x processing timeout after maximum attempts") # < ------ 第3步:提交转化 ------ > logger.info("Doc2x 第3步:提交转化") @@ -98,7 +103,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): # < ------ 第4步:等待结果 ------ > logger.info("Doc2x 第4步:等待结果") params = {"uid": uuid} - while True: + max_attempts = 36 + attempt = 0 + while attempt < max_attempts: res = requests.get( "https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result", headers={"Authorization": "Bearer " + doc2x_api_key}, @@ -110,6 +117,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): elif res_data["status"] == "processing": time.sleep(3) logger.info("Doc2x still processing to convert file") + attempt += 1 + if attempt >= max_attempts: + raise RuntimeError("Doc2x conversion timeout after maximum attempts") # < ------ 第5步:最后的处理 ------ > logger.info("Doc2x 第5步:最后的处理") @@ -124,7 +134,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"): # < ------ 下载 ------ > for attempt in range(max_attempt): try: - result_url = res_json["data"]["url"] + result_url = res_data["url"] res = requests.get(result_url) zip_path = os.path.join(target_path, gen_time_str() + ".zip") unzip_path = os.path.join(target_path, gen_time_str())