首页 > 教程
批量提取word文档标题
- 2025-04-08
- 1297 ℃
import os
import sys
from collections import defaultdict
from docx import Document
from PyQt5.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget,
QLabel, QPushButton, QTextEdit, QFileDialog, QSpinBox,
QGroupBox, QProgressBar)
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QFont, QIcon
class TitleExtractorApp(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Word文档标题提取工具")
self.setWindowIcon(QIcon('icon.png')) # 请准备一个图标文件或删除这行
self.setGeometry(100, 100, 800, 600)
# 设置主窗口样式
self.setStyleSheet("""
QMainWindow {
background-color: #f5f5f5;
}
QGroupBox {
border: 1px solid #ccc;
border-radius: 5px;
margin-top: 10px;
padding-top: 15px;
font-size: 14px;
}
QGroupBox::title {
subcontrol-origin: margin;
left: 10px;
padding: 0 3px;
}
QPushButton {
background-color: #4CAF50;
border: none;
color: white;
padding: 8px 16px;
text-align: center;
text-decoration: none;
font-size: 14px;
margin: 4px 2px;
border-radius: 4px;
}
QPushButton:hover {
background-color: #45a049;
}
QPushButton:pressed {
background-color: #3e8e41;
}
QTextEdit {
border: 1px solid #ccc;
border-radius: 4px;
padding: 8px;
font-family: 'Segoe UI', Arial, sans-serif;
}
QSpinBox {
padding: 5px;
font-size: 14px;
}
""")
self.init_ui()
def init_ui(self):
# 主布局
main_widget = QWidget()
main_layout = QVBoxLayout()
# 标题
title_label = QLabel("Word文档标题提取工具")
title_label.setFont(QFont('Arial', 16, QFont.Bold))
title_label.setAlignment(Qt.AlignCenter)
title_label.setStyleSheet("color: #333; margin-bottom: 20px;")
# 设置组
settings_group = QGroupBox("提取设置")
settings_layout = QHBoxLayout()
# 文件夹选择
folder_layout = QVBoxLayout()
self.folder_label = QLabel("未选择文件夹")
self.folder_label.setStyleSheet("color: #666;")
browse_button = QPushButton("选择文件夹")
browse_button.clicked.connect(self.select_folder)
folder_layout.addWidget(QLabel("文档文件夹:"))
folder_layout.addWidget(self.folder_label)
folder_layout.addWidget(browse_button)
# 标题级别设置
level_layout = QVBoxLayout()
level_label = QLabel("最大标题级别:")
self.level_spin = QSpinBox()
self.level_spin.setRange(1, 6)
self.level_spin.setValue(3)
level_layout.addWidget(level_label)
level_layout.addWidget(self.level_spin)
# 进度条
self.progress_bar = QProgressBar()
self.progress_bar.setRange(0, 100)
self.progress_bar.setValue(0)
self.progress_bar.setTextVisible(False)
# 添加到设置组
settings_layout.addLayout(folder_layout, 70)
settings_layout.addLayout(level_layout, 30)
settings_group.setLayout(settings_layout)
# 操作按钮
button_layout = QHBoxLayout()
extract_button = QPushButton("提取标题")
extract_button.clicked.connect(self.extract_titles)
extract_button.setStyleSheet("background-color: #2196F3;")
save_button = QPushButton("保存结果")
save_button.clicked.connect(self.save_results)
save_button.setStyleSheet("background-color: #FF9800;")
clear_button = QPushButton("清空结果")
clear_button.clicked.connect(self.clear_results)
clear_button.setStyleSheet("background-color: #f44336;")
button_layout.addWidget(extract_button)
button_layout.addWidget(save_button)
button_layout.addWidget(clear_button)
# 结果显示
result_group = QGroupBox("提取结果")
result_layout = QVBoxLayout()
self.result_text = QTextEdit()
self.result_text.setReadOnly(True)
result_layout.addWidget(self.result_text)
result_group.setLayout(result_layout)
# 组装主布局
main_layout.addWidget(title_label)
main_layout.addWidget(settings_group)
main_layout.addWidget(self.progress_bar)
main_layout.addLayout(button_layout)
main_layout.addWidget(result_group)
main_widget.setLayout(main_layout)
self.setCentralWidget(main_widget)
# 状态栏
self.statusBar().showMessage("准备就绪")
# 初始化变量
self.selected_folder = ""
self.extracted_data = {}
def select_folder(self):
folder = QFileDialog.getExistingDirectory(self, "选择包含Word文档的文件夹")
if folder:
self.selected_folder = folder
self.folder_label.setText(folder)
self.statusBar().showMessage(f"已选择文件夹: {folder}")
def extract_titles(self):
if not self.selected_folder:
self.statusBar().showMessage("请先选择文件夹!", 3000)
return
max_level = self.level_spin.value()
self.result_text.clear()
self.extracted_data = {}
file_count = 0
# 统计文件数量用于进度条
total_files = sum(1 for _, _, files in os.walk(self.selected_folder)
for f in files if f.endswith('.docx'))
if total_files == 0:
self.statusBar().showMessage("所选文件夹中没有找到Word文档!", 3000)
return
processed_files = 0
for root, _, files in os.walk(self.selected_folder):
for filename in files:
if filename.endswith('.docx'):
file_path = os.path.join(root, filename)
try:
doc = Document(file_path)
file_data = defaultdict(list)
for p in doc.paragraphs:
if p.style.name.startswith('Heading'):
try:
level = int(p.style.name.split()[1])
if level <= max_level:
file_data[level].append(p.text)
except (IndexError, ValueError):
continue
if file_data:
self.extracted_data[filename] = file_data
processed_files += 1
progress = int((processed_files / total_files) * 100)
self.progress_bar.setValue(progress)
except Exception as e:
self.result_text.append(f"处理文件 {filename} 时出错: {str(e)}\n")
# 显示结果
self.display_results()
self.progress_bar.setValue(100)
self.statusBar().showMessage(f"提取完成!共处理 {processed_files} 个文件", 5000)
def display_results(self):
self.result_text.clear()
if not self.extracted_data:
self.result_text.append("没有提取到任何标题数据")
return
for filename, levels in self.extracted_data.items():
self.result_text.append(f"=== {filename} ===")
for level in sorted(levels.keys()):
self.result_text.append(f"\n[标题 {level}]")
for i, title in enumerate(levels[level], 1):
self.result_text.append(f"{i}. {title}")
self.result_text.append("\n")
def save_results(self):
if not self.extracted_data:
self.statusBar().showMessage("没有可保存的数据!", 3000)
return
file_path, _ = QFileDialog.getSaveFileName(self, "保存结果", "", "文本文件 (*.txt)")
if file_path:
try:
with open(file_path, 'w', encoding='utf-8') as f:
for filename, levels in self.extracted_data.items():
f.write(f"=== {filename} ===\n")
for level in sorted(levels.keys()):
f.write(f"\n[标题 {level}]\n")
for i, title in enumerate(levels[level], 1):
f.write(f"{i}. {title}\n")
f.write("\n")
self.statusBar().showMessage(f"结果已保存到: {file_path}", 5000)
except Exception as e:
self.statusBar().showMessage(f"保存失败: {str(e)}", 5000)
def clear_results(self):
self.result_text.clear()
self.extracted_data = {}
self.progress_bar.setValue(0)
self.statusBar().showMessage("已清空结果", 3000)
if __name__ == "__main__":
app = QApplication(sys.argv)
# 设置全局字体
font = QFont()
font.setFamily("Segoe UI")
font.setPointSize(10)
app.setFont(font)
window = TitleExtractorApp()
window.show()
sys.exit(app.exec_())下一篇:word文档提取目录
相关内容
写代码时容易用到的基础...
批量重命名文件,怎样快...
Coze 扣子 - 字节出品...
mac 80个使用技巧合集
语音转文字,文字转语音...
可免费使用的网站CDN加速服务
PHP判断URL的合法性字符...
php使用header()函数导出excel表格
-
Nginx域名跳转 www跳转和不带www
2024-04-23 1677
-
你选择产品视角还是用户视角?
2024-05-29 1272
-
3个DeepSeek隐藏玩法,99%的人都不知道
2025-04-30 984
-
最全ComfyUI资源整合:教程、插件、工作流(可免费体验)
2025-03-04 1568
-
知网文献免费下载、论文免费查重和去重的方法
2025-03-10 1313
-
如何使用php与数据库进行交互
2024-03-04 1259
-
jquery获取当前年月日时间和星期
2021-06-02 759
-
有没有好用的清理手机垃圾的软件呢
2025-03-10 1573
-
Ant Design Vue 4.0 a-modal弹窗组件封装
2025-04-07 975
-
不备案可以在微信中直接打开的域名后缀
2024-03-04 2834
文章评论 (0)
- 这篇文章还没有收到评论,赶紧来抢沙发吧~


进入有缘空间
点击分享文章