添加doc2x超时设置并添加对xelatex编译的支持
This commit is contained in:
parent
c49e896082
commit
05a5add8da
|
|
@ -342,7 +342,6 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
|
||||||
logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
||||||
return False, -1, [-1]
|
return False, -1, [-1]
|
||||||
|
|
||||||
|
|
||||||
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
|
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
|
||||||
import os, time
|
import os, time
|
||||||
n_fix = 1
|
n_fix = 1
|
||||||
|
|
@ -351,6 +350,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||||
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
|
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
|
||||||
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
|
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
|
||||||
yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
|
||||||
|
# 检查是否需要使用xelatex
|
||||||
|
def check_if_need_xelatex(tex_path):
|
||||||
|
try:
|
||||||
|
with open(tex_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||||
|
content = f.read(5000)
|
||||||
|
# 检查是否有使用xelatex的宏包
|
||||||
|
return any(pkg in content for pkg in ['fontspec', 'xeCJK', 'xetex', 'unicode-math', 'xltxtra', 'xunicode'])
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 根据编译器类型返回编译命令
|
||||||
|
def get_compile_command(compiler, filename):
|
||||||
|
return f'{compiler} -interaction=batchmode -file-line-error {filename}.tex'
|
||||||
|
|
||||||
|
# 确定使用的编译器
|
||||||
|
compiler = 'pdflatex'
|
||||||
|
if check_if_need_xelatex(pj(work_folder_modified, f'{main_file_modified}.tex')):
|
||||||
|
compiler = 'xelatex'
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
import os
|
import os
|
||||||
|
|
@ -361,10 +378,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||||
|
|
||||||
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
|
||||||
|
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
|
||||||
|
|
||||||
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
||||||
# 只有第二步成功,才能继续下面的步骤
|
# 只有第二步成功,才能继续下面的步骤
|
||||||
|
|
@ -375,10 +392,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||||
ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
|
ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
|
||||||
|
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_original), work_folder_original)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, main_file_modified), work_folder_modified)
|
||||||
|
|
||||||
if mode!='translate_zh':
|
if mode!='translate_zh':
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
|
|
@ -386,10 +403,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||||
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
|
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
|
||||||
|
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
|
||||||
ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
|
ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
ok = compile_latex_with_timeout(get_compile_command(compiler, 'merge_diff'), work_folder)
|
||||||
|
|
||||||
# <---------- 检查结果 ----------->
|
# <---------- 检查结果 ----------->
|
||||||
results_ = ""
|
results_ = ""
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
||||||
# < ------ 第2步:轮询等待 ------ >
|
# < ------ 第2步:轮询等待 ------ >
|
||||||
logger.info("Doc2x 处理文件中:轮询等待")
|
logger.info("Doc2x 处理文件中:轮询等待")
|
||||||
params = {"uid": uuid}
|
params = {"uid": uuid}
|
||||||
while True:
|
max_attempts = 60
|
||||||
|
attempt = 0
|
||||||
|
while attempt < max_attempts:
|
||||||
res = requests.get(
|
res = requests.get(
|
||||||
"https://v2.doc2x.noedgeai.com/api/v2/parse/status",
|
"https://v2.doc2x.noedgeai.com/api/v2/parse/status",
|
||||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||||
|
|
@ -82,8 +84,11 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
||||||
elif res_data["status"] == "processing":
|
elif res_data["status"] == "processing":
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
logger.info(f"Doc2x is processing at {res_data['progress']}%")
|
logger.info(f"Doc2x is processing at {res_data['progress']}%")
|
||||||
|
attempt += 1
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(f"Doc2x return an error: {res_data}")
|
raise RuntimeError(f"Doc2x return an error: {res_data}")
|
||||||
|
if attempt >= max_attempts:
|
||||||
|
raise RuntimeError("Doc2x processing timeout after maximum attempts")
|
||||||
|
|
||||||
# < ------ 第3步:提交转化 ------ >
|
# < ------ 第3步:提交转化 ------ >
|
||||||
logger.info("Doc2x 第3步:提交转化")
|
logger.info("Doc2x 第3步:提交转化")
|
||||||
|
|
@ -98,7 +103,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
||||||
# < ------ 第4步:等待结果 ------ >
|
# < ------ 第4步:等待结果 ------ >
|
||||||
logger.info("Doc2x 第4步:等待结果")
|
logger.info("Doc2x 第4步:等待结果")
|
||||||
params = {"uid": uuid}
|
params = {"uid": uuid}
|
||||||
while True:
|
max_attempts = 36
|
||||||
|
attempt = 0
|
||||||
|
while attempt < max_attempts:
|
||||||
res = requests.get(
|
res = requests.get(
|
||||||
"https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
|
"https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result",
|
||||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||||
|
|
@ -110,6 +117,9 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
||||||
elif res_data["status"] == "processing":
|
elif res_data["status"] == "processing":
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
logger.info("Doc2x still processing to convert file")
|
logger.info("Doc2x still processing to convert file")
|
||||||
|
attempt += 1
|
||||||
|
if attempt >= max_attempts:
|
||||||
|
raise RuntimeError("Doc2x conversion timeout after maximum attempts")
|
||||||
|
|
||||||
# < ------ 第5步:最后的处理 ------ >
|
# < ------ 第5步:最后的处理 ------ >
|
||||||
logger.info("Doc2x 第5步:最后的处理")
|
logger.info("Doc2x 第5步:最后的处理")
|
||||||
|
|
@ -124,7 +134,7 @@ def 解析PDF_DOC2X(pdf_file_path, format="tex"):
|
||||||
# < ------ 下载 ------ >
|
# < ------ 下载 ------ >
|
||||||
for attempt in range(max_attempt):
|
for attempt in range(max_attempt):
|
||||||
try:
|
try:
|
||||||
result_url = res_json["data"]["url"]
|
result_url = res_data["url"]
|
||||||
res = requests.get(result_url)
|
res = requests.get(result_url)
|
||||||
zip_path = os.path.join(target_path, gen_time_str() + ".zip")
|
zip_path = os.path.join(target_path, gen_time_str() + ".zip")
|
||||||
unzip_path = os.path.join(target_path, gen_time_str())
|
unzip_path = os.path.join(target_path, gen_time_str())
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue