dseditor commited on
Commit
244abc7
·
verified ·
1 Parent(s): 98b9000

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -37,25 +37,27 @@ def format_docx(file, chapter_keywords):
37
  # 合併所有模式
38
  combined_pattern = '|'.join(patterns)
39
 
40
- # 先處理多餘的換行:標記要保留的段落
41
- paragraphs_to_keep = []
42
- previous_empty = False
43
 
44
- for paragraph in doc.paragraphs:
45
- current_empty = len(paragraph.text.strip()) == 0
46
-
47
- # 如果當前段落不是空的,或者是第一個空段落,則保留
48
- if not current_empty or not previous_empty:
49
- paragraphs_to_keep.append(paragraph)
50
 
51
- previous_empty = current_empty
 
 
 
 
 
 
 
52
 
53
- # 刪除不需要的段落(從後往前刪除避免索引問題)
54
- all_paragraphs = list(doc.paragraphs)
55
- for paragraph in reversed(all_paragraphs):
56
- if paragraph not in paragraphs_to_keep:
57
- p = paragraph._element
58
- p.getparent().remove(p)
59
 
60
  # 處理每個段落
61
  for paragraph in doc.paragraphs:
 
37
  # 合併所有模式
38
  combined_pattern = '|'.join(patterns)
39
 
40
+ # 先標記空段落,準備刪除多餘的換行
41
+ paragraphs_list = list(doc.paragraphs)
42
+ to_delete = []
43
 
44
+ for i in range(len(paragraphs_list)):
45
+ current_para = paragraphs_list[i]
46
+ current_empty = len(current_para.text.strip()) == 0
 
 
 
47
 
48
+ # 如果是空段落,檢查前一個段落
49
+ if current_empty and i > 0:
50
+ prev_para = paragraphs_list[i-1]
51
+ prev_empty = len(prev_para.text.strip()) == 0
52
+
53
+ # 如果前一個段落也是空的,標記當前段落刪除
54
+ if prev_empty:
55
+ to_delete.append(current_para)
56
 
57
+ # 刪除多餘的空段落(從後往前刪除)
58
+ for para in reversed(to_delete):
59
+ p = para._element
60
+ p.getparent().remove(p)
 
 
61
 
62
  # 處理每個段落
63
  for paragraph in doc.paragraphs: