添加doc2x超时设置并添加对xelatex编译的支持
This commit is contained in:
parent
c49e896082
commit
05a5add8da
|
|
@ -342,7 +342,6 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
|
|||
logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
||||
return False, -1, [-1]
|
||||
|
||||
|
||||
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
|
||||
import os, time
|
||||
n_fix = 1
|
||||
|
|
@ -351,6 +350,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
|
||||
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
|
||||
yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
|
||||
# 检查是否需要使用xelatex
|
||||
def check_if_need_xelatex(tex_path):
|
||||
try:
|
||||
with open(tex_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
content = f.read(5000)
|
||||
# 检查是否有使用xelatex的宏包
|
||||
return any(pkg in content for pkg in ['fontspec', 'xeCJK', 'xetex', 'unicode-math', 'xltxtra', 'xunicode'])
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# 根据编译器类型返回编译命令
|
||||
def get_compile_command(compiler, filename):
|
||||
return f'{compiler} -interaction=batchmode -file-line-error {filename}.tex'
|
||||
|
||||
# 确定使用的编译器
|
||||
compiler = 'pdflatex'
|
||||
if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')):
|
||||
compiler = 'xelatex'
|
||||
|
||||
while True:
|
||||
import os
|
||||
|
|
@ -361,10 +378,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||
|
||||
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
|
||||
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
|
||||
|
||||
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
||||
# 只有第二步成功,才能继续下面的步骤
|
||||
|
|
@ -375,10 +392,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||
ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
|
||||
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
|
||||
|
||||
if mode!='translate_zh':
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
|
|
@ -386,10 +403,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
|
||||
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
|
||||
ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
|
||||
ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
|
||||
|
||||
# <---------- 检查结果 ----------->
|
||||
results_ = ""
|
||||
|
|
|
|||
|
|
@ -70,7 +70,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
|||
# < ------ 第2步:轮询等待 ------ >
|
||||
logger.info("Doc2x 处理文件中:轮询等待")
|
||||
params = {"uid": uuid}
|
||||
while True:
|
||||
max_attempts = 60
|
||||
attempt = 0
|
||||
while attempt < max_attempts:
|
||||
res = requests.get(
|
||||
"https://v2.doc2x.noedgeai.com/api/v2/parse/status",
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||
|
|
@ -82,8 +84,11 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
|||
elif res_data["status"] == "processing":
|
||||
time.sleep(5)
|
||||
logger.info(f"Doc2x is processing at {res_data['progress']}%")
|
||||
attempt += 1
|
||||
else:
|
||||
raise RuntimeError(f"Doc2x return an error: {res_data}")
|
||||
if attempt >= max_attempts:
|
||||
raise RuntimeError("Doc2x processing timeout after maximum attempts")
|
||||
|
||||
# < ------ 第3步:提交转化 ------ >
|
||||
logger.info("Doc2x 第3步:提交转化")
|
||||
|
|
@ -98,7 +103,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
|||
# < ------ 第4步:等待结果 ------ >
|
||||
logger.info("Doc2x 第4步:等待结果")
|
||||
params = {"uid": uuid}
|
||||
while True:
|
||||
max_attempts = 36
|
||||
attempt = 0
|
||||
while attempt < max_attempts:
|
||||
res = requests.get(
|
||||
"https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||
|
|
@ -110,6 +117,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
|||
elif res_data["status"] == "processing":
|
||||
time.sleep(3)
|
||||
logger.info("Doc2x still processing to convert file")
|
||||
attempt += 1
|
||||
if attempt >= max_attempts:
|
||||
raise RuntimeError("Doc2x conversion timeout after maximum attempts")
|
||||
|
||||
# < ------ 第5步:最后的处理 ------ >
|
||||
logger.info("Doc2x 第5步:最后的处理")
|
||||
|
|
@ -124,7 +134,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
|||
# < ------ 下载 ------ >
|
||||
for attempt in range(max_attempt):
|
||||
try:
|
||||
result_url = res_json["data"]["url"]
|
||||
result_url = res_data["url"]
|
||||
res = requests.get(result_url)
|
||||
zip_path = os.path.join(target_path, gen_time_str() + ".zip")
|
||||
unzip_path = os.path.join(target_path, gen_time_str())
|
||||
|
|
|
|||
Loading…
Reference in New Issue