Spaces:
Sleeping
Sleeping
Nikhil Singh
commited on
Commit
·
5777a9a
1
Parent(s):
3fd92e9
more cleaning
Browse files- app.py +13 -7
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from mailparser import parse_from_string
|
3 |
from bs4 import BeautifulSoup
|
4 |
|
@@ -13,18 +14,24 @@ def clean_email(email):
|
|
13 |
cleaned_text = ' '.join(soup.get_text(separator=' ').split())
|
14 |
return cleaned_text
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
def present(email_content):
|
17 |
email = accept_mail(email_content)
|
18 |
cleaned_text = clean_email(email)
|
|
|
19 |
email_info = {
|
20 |
"Subject": email.subject,
|
21 |
"From": email.from_,
|
22 |
"To": email.to,
|
23 |
"Date": email.date,
|
24 |
-
"Message ID": email.message_id,
|
25 |
-
"Headers": str(email.headers),
|
26 |
-
"
|
27 |
-
"Cleaned Body": cleaned_text
|
28 |
}
|
29 |
return [email_info[key] for key in email_info]
|
30 |
|
@@ -36,9 +43,8 @@ demo = gr.Interface(
|
|
36 |
gr.components.Textbox(label="From"),
|
37 |
gr.components.Textbox(label="To"),
|
38 |
gr.components.Textbox(label="Date"),
|
39 |
-
gr.components.Textbox(label="Message ID"),
|
40 |
-
gr.components.Textbox(label="Headers"),
|
41 |
-
gr.components.Textbox(label="Attachments"),
|
42 |
gr.components.Textbox(label="Cleaned Body")
|
43 |
],
|
44 |
title="Email Info",
|
|
|
1 |
import gradio as gr
|
2 |
+
import re
|
3 |
from mailparser import parse_from_string
|
4 |
from bs4 import BeautifulSoup
|
5 |
|
|
|
14 |
cleaned_text = ' '.join(soup.get_text(separator=' ').split())
|
15 |
return cleaned_text
|
16 |
|
17 |
+
def remove_special_characters(text):
|
18 |
+
pattern = r'[=_-]+'
|
19 |
+
|
20 |
+
cleaned_text = re.sub(pattern, '', text)
|
21 |
+
return cleaned_text
|
22 |
+
|
23 |
def present(email_content):
|
24 |
email = accept_mail(email_content)
|
25 |
cleaned_text = clean_email(email)
|
26 |
+
further_cleaned_text = remove_special_characters(cleaned_text)
|
27 |
email_info = {
|
28 |
"Subject": email.subject,
|
29 |
"From": email.from_,
|
30 |
"To": email.to,
|
31 |
"Date": email.date,
|
32 |
+
# "Message ID": email.message_id,
|
33 |
+
# "Headers": str(email.headers),
|
34 |
+
"Cleaned Body": further_cleaned_text
|
|
|
35 |
}
|
36 |
return [email_info[key] for key in email_info]
|
37 |
|
|
|
43 |
gr.components.Textbox(label="From"),
|
44 |
gr.components.Textbox(label="To"),
|
45 |
gr.components.Textbox(label="Date"),
|
46 |
+
# gr.components.Textbox(label="Message ID"),
|
47 |
+
# gr.components.Textbox(label="Headers"),
|
|
|
48 |
gr.components.Textbox(label="Cleaned Body")
|
49 |
],
|
50 |
title="Email Info",
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ mail-parser
|
|
3 |
scipy==1.12
|
4 |
gradio
|
5 |
typing
|
6 |
-
bs4
|
|
|
|
3 |
scipy==1.12
|
4 |
gradio
|
5 |
typing
|
6 |
+
bs4
|
7 |
+
re
|