Upload app.py
Browse files
app.py
CHANGED
@@ -2,9 +2,19 @@ import gradio as gr
|
|
2 |
import re
|
3 |
from docx import Document
|
4 |
from docx.shared import Cm, Pt
|
|
|
|
|
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def format_docx(file, chapter_keywords):
|
9 |
"""
|
10 |
處理上傳的 Word 文件
|
@@ -19,7 +29,6 @@ def format_docx(file, chapter_keywords):
|
|
19 |
doc = Document(file.name)
|
20 |
|
21 |
# 確保有 Heading 1 樣式
|
22 |
-
from docx.enum.style import WD_STYLE_TYPE
|
23 |
styles = doc.styles
|
24 |
if 'Heading 1' not in styles:
|
25 |
heading_style = styles.add_style('Heading 1', WD_STYLE_TYPE.PARAGRAPH)
|
@@ -61,11 +70,17 @@ def format_docx(file, chapter_keywords):
|
|
61 |
heading.paragraph_format.line_spacing = 1.0
|
62 |
heading.paragraph_format.left_indent = Cm(0)
|
63 |
heading.paragraph_format.first_line_indent = Cm(0)
|
|
|
64 |
for run in heading.runs:
|
65 |
run.font.name = '新細明體'
|
66 |
run.font.size = Pt(16)
|
67 |
elif content_type == 'paragraph':
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
69 |
clean_text = ' '.join(lines)
|
70 |
para = doc.add_paragraph(clean_text)
|
71 |
para.paragraph_format.space_before = Cm(0)
|
@@ -118,38 +133,6 @@ def create_interface():
|
|
118 |
interactive=False
|
119 |
)
|
120 |
|
121 |
-
gr.HTML("""<div style="margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
|
122 |
-
<h3>🛠️ 處理功能說明</h3>
|
123 |
-
<ul>
|
124 |
-
<li><strong>樣式調整:</strong>將所有段落設定為無間距樣式</li>
|
125 |
-
<li><strong>刪除多餘換行:</strong>自動合併連續的空行,避免過多空白</li>
|
126 |
-
<li><strong>首行縮排:</strong>每個段落首行縮排 2 個字元位置</li>
|
127 |
-
<li><strong>章節識別:</strong>自動識別章節標題(如「第一章」、「第二節」等)</li>
|
128 |
-
<li><strong>標題格式:</strong>章節標題設定為粗體標題 1 樣式(無縮排)</li>
|
129 |
-
<li><strong>分頁設定:</strong>每個章節前自動分頁</li>
|
130 |
-
</ul>
|
131 |
-
</div>""")
|
132 |
-
|
133 |
-
gr.HTML("""<div style="margin-top: 20px; padding: 20px; background-color: #e8f4f8; border-radius: 10px;">
|
134 |
-
<h3>💡 使用範例</h3>
|
135 |
-
<p><strong>章節關鍵字設定:</strong></p>
|
136 |
-
<ul>
|
137 |
-
<li>小說:<code>章,節</code></li>
|
138 |
-
<li>漫畫:<code>話,回</code></li>
|
139 |
-
<li>論文:<code>章,節,段</code></li>
|
140 |
-
<li>劇本:<code>幕,場</code></li>
|
141 |
-
</ul>
|
142 |
-
<p><strong>支援的章節格式:</strong></p>
|
143 |
-
<ul>
|
144 |
-
<li>第一章、第二章、第三章...</li>
|
145 |
-
<li>第1話、第2話、第3話...</li>
|
146 |
-
<li>第一節、第二節、第三節...</li>
|
147 |
-
<li>第一幕、第二幕、第三幕...</li>
|
148 |
-
<li>第I章、第II章、第III章...</li>
|
149 |
-
<li>第二十三章、第三十四節...</li>
|
150 |
-
</ul>
|
151 |
-
</div>""")
|
152 |
-
|
153 |
process_btn.click(
|
154 |
fn=format_docx,
|
155 |
inputs=[file_input, chapter_input],
|
|
|
2 |
import re
|
3 |
from docx import Document
|
4 |
from docx.shared import Cm, Pt
|
5 |
+
from docx.enum.style import WD_STYLE_TYPE
|
6 |
+
from docx.oxml import OxmlElement
|
7 |
+
from docx.oxml.ns import qn
|
8 |
import tempfile
|
9 |
import os
|
10 |
|
11 |
+
def set_outline_level(paragraph, level: int = 0):
|
12 |
+
p = paragraph._p
|
13 |
+
pPr = p.get_or_add_pPr()
|
14 |
+
outline = OxmlElement('w:outlineLvl')
|
15 |
+
outline.set(qn('w:val'), str(level))
|
16 |
+
pPr.append(outline)
|
17 |
+
|
18 |
def format_docx(file, chapter_keywords):
|
19 |
"""
|
20 |
處理上傳的 Word 文件
|
|
|
29 |
doc = Document(file.name)
|
30 |
|
31 |
# 確保有 Heading 1 樣式
|
|
|
32 |
styles = doc.styles
|
33 |
if 'Heading 1' not in styles:
|
34 |
heading_style = styles.add_style('Heading 1', WD_STYLE_TYPE.PARAGRAPH)
|
|
|
70 |
heading.paragraph_format.line_spacing = 1.0
|
71 |
heading.paragraph_format.left_indent = Cm(0)
|
72 |
heading.paragraph_format.first_line_indent = Cm(0)
|
73 |
+
set_outline_level(heading, 0) # 強制指定為層級 1
|
74 |
for run in heading.runs:
|
75 |
run.font.name = '新細明體'
|
76 |
run.font.size = Pt(16)
|
77 |
elif content_type == 'paragraph':
|
78 |
+
# 處理多行段落:分行再合併為單一段落
|
79 |
+
lines = []
|
80 |
+
for line in text.splitlines():
|
81 |
+
stripped = line.strip()
|
82 |
+
if stripped:
|
83 |
+
lines.append(stripped)
|
84 |
clean_text = ' '.join(lines)
|
85 |
para = doc.add_paragraph(clean_text)
|
86 |
para.paragraph_format.space_before = Cm(0)
|
|
|
133 |
interactive=False
|
134 |
)
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
process_btn.click(
|
137 |
fn=format_docx,
|
138 |
inputs=[file_input, chapter_input],
|