dseditor commited on
Commit
0b70552
·
verified ·
1 Parent(s): b14cac5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -34
app.py CHANGED
@@ -2,9 +2,19 @@ import gradio as gr
2
  import re
3
  from docx import Document
4
  from docx.shared import Cm, Pt
 
 
 
5
  import tempfile
6
  import os
7
 
 
 
 
 
 
 
 
8
  def format_docx(file, chapter_keywords):
9
  """
10
  處理上傳的 Word 文件
@@ -19,7 +29,6 @@ def format_docx(file, chapter_keywords):
19
  doc = Document(file.name)
20
 
21
  # 確保有 Heading 1 樣式
22
- from docx.enum.style import WD_STYLE_TYPE
23
  styles = doc.styles
24
  if 'Heading 1' not in styles:
25
  heading_style = styles.add_style('Heading 1', WD_STYLE_TYPE.PARAGRAPH)
@@ -61,11 +70,17 @@ def format_docx(file, chapter_keywords):
61
  heading.paragraph_format.line_spacing = 1.0
62
  heading.paragraph_format.left_indent = Cm(0)
63
  heading.paragraph_format.first_line_indent = Cm(0)
 
64
  for run in heading.runs:
65
  run.font.name = '新細明體'
66
  run.font.size = Pt(16)
67
  elif content_type == 'paragraph':
68
- lines = [line.strip() for line in text.splitlines() if line.strip()]
 
 
 
 
 
69
  clean_text = ' '.join(lines)
70
  para = doc.add_paragraph(clean_text)
71
  para.paragraph_format.space_before = Cm(0)
@@ -118,38 +133,6 @@ def create_interface():
118
  interactive=False
119
  )
120
 
121
- gr.HTML("""<div style="margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
122
- <h3>🛠️ 處理功能說明</h3>
123
- <ul>
124
- <li><strong>樣式調整:</strong>將所有段落設定為無間距樣式</li>
125
- <li><strong>刪除多餘換行:</strong>自動合併連續的空行,避免過多空白</li>
126
- <li><strong>首行縮排:</strong>每個段落首行縮排 2 個字元位置</li>
127
- <li><strong>章節識別:</strong>自動識別章節標題(如「第一章」、「第二節」等)</li>
128
- <li><strong>標題格式:</strong>章節標題設定為粗體標題 1 樣式(無縮排)</li>
129
- <li><strong>分頁設定:</strong>每個章節前自動分頁</li>
130
- </ul>
131
- </div>""")
132
-
133
- gr.HTML("""<div style="margin-top: 20px; padding: 20px; background-color: #e8f4f8; border-radius: 10px;">
134
- <h3>💡 使用範例</h3>
135
- <p><strong>章節關鍵字設定:</strong></p>
136
- <ul>
137
- <li>小說:<code>章,節</code></li>
138
- <li>漫畫:<code>話,回</code></li>
139
- <li>論文:<code>章,節,段</code></li>
140
- <li>劇本:<code>幕,場</code></li>
141
- </ul>
142
- <p><strong>支援的章節格式:</strong></p>
143
- <ul>
144
- <li>第一章、第二章、第三章...</li>
145
- <li>第1話、第2話、第3話...</li>
146
- <li>第一節、第二節、第三節...</li>
147
- <li>第一幕、第二幕、第三幕...</li>
148
- <li>第I章、第II章、第III章...</li>
149
- <li>第二十三章、第三十四節...</li>
150
- </ul>
151
- </div>""")
152
-
153
  process_btn.click(
154
  fn=format_docx,
155
  inputs=[file_input, chapter_input],
 
2
  import re
3
  from docx import Document
4
  from docx.shared import Cm, Pt
5
+ from docx.enum.style import WD_STYLE_TYPE
6
+ from docx.oxml import OxmlElement
7
+ from docx.oxml.ns import qn
8
  import tempfile
9
  import os
10
 
11
+ def set_outline_level(paragraph, level: int = 0):
12
+ p = paragraph._p
13
+ pPr = p.get_or_add_pPr()
14
+ outline = OxmlElement('w:outlineLvl')
15
+ outline.set(qn('w:val'), str(level))
16
+ pPr.append(outline)
17
+
18
  def format_docx(file, chapter_keywords):
19
  """
20
  處理上傳的 Word 文件
 
29
  doc = Document(file.name)
30
 
31
  # 確保有 Heading 1 樣式
 
32
  styles = doc.styles
33
  if 'Heading 1' not in styles:
34
  heading_style = styles.add_style('Heading 1', WD_STYLE_TYPE.PARAGRAPH)
 
70
  heading.paragraph_format.line_spacing = 1.0
71
  heading.paragraph_format.left_indent = Cm(0)
72
  heading.paragraph_format.first_line_indent = Cm(0)
73
+ set_outline_level(heading, 0) # 強制指定為層級 1
74
  for run in heading.runs:
75
  run.font.name = '新細明體'
76
  run.font.size = Pt(16)
77
  elif content_type == 'paragraph':
78
+ # 處理多行段落:分行再合併為單一段落
79
+ lines = []
80
+ for line in text.splitlines():
81
+ stripped = line.strip()
82
+ if stripped:
83
+ lines.append(stripped)
84
  clean_text = ' '.join(lines)
85
  para = doc.add_paragraph(clean_text)
86
  para.paragraph_format.space_before = Cm(0)
 
133
  interactive=False
134
  )
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  process_btn.click(
137
  fn=format_docx,
138
  inputs=[file_input, chapter_input],