dseditor commited on
Commit
d39278e
·
verified ·
1 Parent(s): 0b70552

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -15,6 +15,14 @@ def set_outline_level(paragraph, level: int = 0):
15
  outline.set(qn('w:val'), str(level))
16
  pPr.append(outline)
17
 
 
 
 
 
 
 
 
 
18
  def format_docx(file, chapter_keywords):
19
  """
20
  處理上傳的 Word 文件
@@ -70,18 +78,12 @@ def format_docx(file, chapter_keywords):
70
  heading.paragraph_format.line_spacing = 1.0
71
  heading.paragraph_format.left_indent = Cm(0)
72
  heading.paragraph_format.first_line_indent = Cm(0)
73
- set_outline_level(heading, 0) # 強制指定為層級 1
74
  for run in heading.runs:
75
  run.font.name = '新細明體'
76
  run.font.size = Pt(16)
77
  elif content_type == 'paragraph':
78
- # 處理多行段落:分行再合併為單一段落
79
- lines = []
80
- for line in text.splitlines():
81
- stripped = line.strip()
82
- if stripped:
83
- lines.append(stripped)
84
- clean_text = ' '.join(lines)
85
  para = doc.add_paragraph(clean_text)
86
  para.paragraph_format.space_before = Cm(0)
87
  para.paragraph_format.space_after = Cm(0)
 
15
  outline.set(qn('w:val'), str(level))
16
  pPr.append(outline)
17
 
18
+ def normalize_paragraph(text):
19
+ # 移除所有換行(包括
20
+ , Word 的換行符)
21
+ text = re.sub(r'[\r\n]+', ' ', text)
22
+ # 壓縮多餘空白
23
+ text = re.sub(r'\s{2,}', ' ', text)
24
+ return text.strip()
25
+
26
  def format_docx(file, chapter_keywords):
27
  """
28
  處理上傳的 Word 文件
 
78
  heading.paragraph_format.line_spacing = 1.0
79
  heading.paragraph_format.left_indent = Cm(0)
80
  heading.paragraph_format.first_line_indent = Cm(0)
81
+ set_outline_level(heading, 0)
82
  for run in heading.runs:
83
  run.font.name = '新細明體'
84
  run.font.size = Pt(16)
85
  elif content_type == 'paragraph':
86
+ clean_text = normalize_paragraph(text)
 
 
 
 
 
 
87
  para = doc.add_paragraph(clean_text)
88
  para.paragraph_format.space_before = Cm(0)
89
  para.paragraph_format.space_after = Cm(0)