Upload app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,14 @@ def set_outline_level(paragraph, level: int = 0):
|
|
15 |
outline.set(qn('w:val'), str(level))
|
16 |
pPr.append(outline)
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def format_docx(file, chapter_keywords):
|
19 |
"""
|
20 |
處理上傳的 Word 文件
|
@@ -70,18 +78,12 @@ def format_docx(file, chapter_keywords):
|
|
70 |
heading.paragraph_format.line_spacing = 1.0
|
71 |
heading.paragraph_format.left_indent = Cm(0)
|
72 |
heading.paragraph_format.first_line_indent = Cm(0)
|
73 |
-
set_outline_level(heading, 0)
|
74 |
for run in heading.runs:
|
75 |
run.font.name = '新細明體'
|
76 |
run.font.size = Pt(16)
|
77 |
elif content_type == 'paragraph':
|
78 |
-
|
79 |
-
lines = []
|
80 |
-
for line in text.splitlines():
|
81 |
-
stripped = line.strip()
|
82 |
-
if stripped:
|
83 |
-
lines.append(stripped)
|
84 |
-
clean_text = ' '.join(lines)
|
85 |
para = doc.add_paragraph(clean_text)
|
86 |
para.paragraph_format.space_before = Cm(0)
|
87 |
para.paragraph_format.space_after = Cm(0)
|
|
|
15 |
outline.set(qn('w:val'), str(level))
|
16 |
pPr.append(outline)
|
17 |
|
18 |
+
def normalize_paragraph(text):
|
19 |
+
# 移除所有換行(包括
|
20 |
+
, Word 的換行符)
|
21 |
+
text = re.sub(r'[\r\n]+', ' ', text)
|
22 |
+
# 壓縮多餘空白
|
23 |
+
text = re.sub(r'\s{2,}', ' ', text)
|
24 |
+
return text.strip()
|
25 |
+
|
26 |
def format_docx(file, chapter_keywords):
|
27 |
"""
|
28 |
處理上傳的 Word 文件
|
|
|
78 |
heading.paragraph_format.line_spacing = 1.0
|
79 |
heading.paragraph_format.left_indent = Cm(0)
|
80 |
heading.paragraph_format.first_line_indent = Cm(0)
|
81 |
+
set_outline_level(heading, 0)
|
82 |
for run in heading.runs:
|
83 |
run.font.name = '新細明體'
|
84 |
run.font.size = Pt(16)
|
85 |
elif content_type == 'paragraph':
|
86 |
+
clean_text = normalize_paragraph(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
para = doc.add_paragraph(clean_text)
|
88 |
para.paragraph_format.space_before = Cm(0)
|
89 |
para.paragraph_format.space_after = Cm(0)
|