Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,9 @@ PRIOR_MESSAGE_MARKERS = [
|
|
13 |
re.compile(r'^On .* wrote:', re.IGNORECASE),
|
14 |
re.compile(r'^----\s?Original Message\s?----$', re.IGNORECASE),
|
15 |
re.compile(r'^Begin forwarded message:', re.IGNORECASE),
|
|
|
|
|
|
|
16 |
|
17 |
# Portuguese patterns
|
18 |
re.compile(r'^Em .* escreveu:', re.IGNORECASE),
|
@@ -67,6 +70,11 @@ def remove_quoted_text(soup):
|
|
67 |
for hr in soup.find_all('hr'):
|
68 |
hr.decompose()
|
69 |
|
|
|
|
|
|
|
|
|
|
|
70 |
return soup
|
71 |
|
72 |
def extract_latest_message_from_lines(lines):
|
|
|
13 |
re.compile(r'^On .* wrote:', re.IGNORECASE),
|
14 |
re.compile(r'^----\s?Original Message\s?----$', re.IGNORECASE),
|
15 |
re.compile(r'^Begin forwarded message:', re.IGNORECASE),
|
16 |
+
|
17 |
+
# Custom separators in email
|
18 |
+
re.compile(r'^-+.*-+$'), # For lines like "----------------------------------------------------------------------------------------------------------------"
|
19 |
|
20 |
# Portuguese patterns
|
21 |
re.compile(r'^Em .* escreveu:', re.IGNORECASE),
|
|
|
70 |
for hr in soup.find_all('hr'):
|
71 |
hr.decompose()
|
72 |
|
73 |
+
# Remove tables with dotted borders (a typical marker of a previous conversation)
|
74 |
+
for table in soup.find_all('table'):
|
75 |
+
if 'border-top:1px dotted' in str(table):
|
76 |
+
table.decompose()
|
77 |
+
|
78 |
return soup
|
79 |
|
80 |
def extract_latest_message_from_lines(lines):
|