Spaces:
Sleeping
Sleeping
| import os | |
| from PyPDF2 import PdfReader, PdfWriter | |
| from io import BytesIO | |
| import gradio as gr | |
| def get_pdf_size(pdf_writer): | |
| """获取当前PdfWriter对象的内容大小(以字节为单位)""" | |
| buffer = BytesIO() | |
| pdf_writer.write(buffer) | |
| return buffer.tell() | |
| def split_pdf_by_pages(input_pdf_path, output_folder, split_pages): | |
| """按分页数分割PDF""" | |
| # 创建输出文件夹 | |
| if not os.path.exists(output_folder): | |
| os.makedirs(output_folder) | |
| # 读取PDF文件 | |
| reader = PdfReader(input_pdf_path) | |
| total_pages = len(reader.pages) | |
| # 检查用户输入的分页数是否有效 | |
| if any(page > total_pages or page < 1 for page in split_pages): | |
| return f"错误:分页数超出PDF总页数范围(总页数:{total_pages})。" | |
| # 对分页数进行排序并去重 | |
| split_pages = sorted(set(split_pages)) | |
| # 切割PDF | |
| start_page = 0 | |
| result_files = [] | |
| for i, split_page in enumerate(split_pages): | |
| writer = PdfWriter() | |
| end_page = split_page - 1 # PyPDF2的页码从0开始 | |
| # 添加从 start_page 到 end_page 的页面 | |
| for page_num in range(start_page, end_page + 1): | |
| writer.add_page(reader.pages[page_num]) | |
| # 保存切割后的PDF文件 | |
| output_pdf_path = os.path.join(output_folder, f"part_{i+1}.pdf") | |
| with open(output_pdf_path, "wb") as output_pdf: | |
| writer.write(output_pdf) | |
| result_files.append(output_pdf_path) | |
| start_page = end_page + 1 | |
| # 处理最后一组页面 | |
| if start_page < total_pages: | |
| writer = PdfWriter() | |
| for page_num in range(start_page, total_pages): | |
| writer.add_page(reader.pages[page_num]) | |
| output_pdf_path = os.path.join(output_folder, f"part_{len(split_pages) + 1}.pdf") | |
| with open(output_pdf_path, "wb") as output_pdf: | |
| writer.write(output_pdf) | |
| result_files.append(output_pdf_path) | |
| return result_files | |
| def split_pdf_by_size(input_pdf_path, output_folder, max_size_mb): | |
| """按文件大小分割PDF""" | |
| # 创建输出文件夹 | |
| if not os.path.exists(output_folder): | |
| os.makedirs(output_folder) | |
| # 读取PDF文件 | |
| reader = PdfReader(input_pdf_path) | |
| total_pages = len(reader.pages) | |
| # 初始化变量 | |
| writer = PdfWriter() | |
| current_part = 1 | |
| max_size_bytes = max_size_mb * 1024 * 1024 # 将MB转换为字节 | |
| result_files = [] | |
| # 逐页处理 | |
| for page_num in range(total_pages): | |
| # 添加当前页 | |
| writer.add_page(reader.pages[page_num]) | |
| # 检查当前文件大小 | |
| current_size = get_pdf_size(writer) | |
| if current_size >= max_size_bytes: | |
| # 如果超过最大大小,保存当前部分 | |
| output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf") | |
| with open(output_pdf_path, "wb") as output_pdf: | |
| writer.write(output_pdf) | |
| result_files.append(output_pdf_path) | |
| # 重置writer并增加部分计数 | |
| writer = PdfWriter() | |
| current_part += 1 | |
| # 保存最后一部分(如果有剩余页面) | |
| if len(writer.pages) > 0: | |
| output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf") | |
| with open(output_pdf_path, "wb") as output_pdf: | |
| writer.write(output_pdf) | |
| result_files.append(output_pdf_path) | |
| return result_files | |
| def process_pdf(input_pdf, mode, split_pages=None, max_size_mb=None): | |
| """处理PDF文件""" | |
| output_folder = "output_parts" | |
| if not os.path.exists(output_folder): | |
| os.makedirs(output_folder) | |
| if mode == "按分页数分割": | |
| if not split_pages: | |
| return "错误:请输入分页数。" | |
| split_pages = [int(page) for page in split_pages.split(",")] | |
| result_files = split_pdf_by_pages(input_pdf, output_folder, split_pages) | |
| elif mode == "按文件大小分割": | |
| if not max_size_mb: | |
| return "错误:请输入最大文件大小。" | |
| result_files = split_pdf_by_size(input_pdf, output_folder, float(max_size_mb)) | |
| else: | |
| return "错误:无效的模式。" | |
| if isinstance(result_files, str): # 如果返回的是错误信息 | |
| return result_files | |
| # 返回所有生成的文件 | |
| return result_files | |
| # Gradio 界面 | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# PDF 分割工具") | |
| with gr.Row(): | |
| input_pdf = gr.File(label="上传PDF文件", type="filepath") | |
| mode = gr.Radio(choices=["按分页数分割", "按文件大小分割"], label="选择分割模式") | |
| with gr.Row(): | |
| split_pages = gr.Textbox(label="分页数(例如:3,5,10)", visible=True) | |
| max_size_mb = gr.Number(label="每部分的最大大小(MB)", visible=False) | |
| with gr.Row(): | |
| output_files = gr.Files(label="分割后的文件") | |
| with gr.Row(): | |
| submit_btn = gr.Button("开始分割") | |
| download_all_btn = gr.Button("批量下载") | |
| # 动态显示/隐藏输入框 | |
| def toggle_inputs(mode): | |
| if mode == "按分页数分割": | |
| return gr.Textbox(visible=True), gr.Number(visible=False) | |
| else: | |
| return gr.Textbox(visible=False), gr.Number(visible=True) | |
| # 批量下载功能 | |
| def download_all_files(file_list): | |
| if not file_list: | |
| return None | |
| # 创建一个临时zip文件 | |
| import tempfile | |
| import zipfile | |
| import shutil | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as temp_zip: | |
| with zipfile.ZipFile(temp_zip.name, 'w') as zf: | |
| for file_path in file_list: | |
| # 获取文件名 | |
| file_name = os.path.basename(file_path) | |
| # 将文件添加到zip中 | |
| zf.write(file_path, file_name) | |
| return temp_zip.name | |
| mode.change(toggle_inputs, inputs=mode, outputs=[split_pages, max_size_mb]) | |
| # 绑定处理函数 | |
| submit_btn.click( | |
| process_pdf, | |
| inputs=[input_pdf, mode, split_pages, max_size_mb], | |
| outputs=output_files | |
| ) | |
| # 绑定批量下载函数 | |
| download_all_btn.click( | |
| download_all_files, | |
| inputs=[output_files], | |
| outputs=gr.File(label="下载所有文件") | |
| ) | |
| theme=gr.themes.Soft() | |
| # 启动应用 | |
| demo.launch() | |