Nikhil Singh commited on
Commit
5777a9a
·
1 Parent(s): 3fd92e9

more cleaning

Browse files
Files changed (2) hide show
  1. app.py +13 -7
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  from mailparser import parse_from_string
3
  from bs4 import BeautifulSoup
4
 
@@ -13,18 +14,24 @@ def clean_email(email):
13
  cleaned_text = ' '.join(soup.get_text(separator=' ').split())
14
  return cleaned_text
15
 
 
 
 
 
 
 
16
  def present(email_content):
17
  email = accept_mail(email_content)
18
  cleaned_text = clean_email(email)
 
19
  email_info = {
20
  "Subject": email.subject,
21
  "From": email.from_,
22
  "To": email.to,
23
  "Date": email.date,
24
- "Message ID": email.message_id,
25
- "Headers": str(email.headers), # Convert dictionary to string for display
26
- "Attachments": str(email.attachments), # Convert list to string for display
27
- "Cleaned Body": cleaned_text
28
  }
29
  return [email_info[key] for key in email_info]
30
 
@@ -36,9 +43,8 @@ demo = gr.Interface(
36
  gr.components.Textbox(label="From"),
37
  gr.components.Textbox(label="To"),
38
  gr.components.Textbox(label="Date"),
39
- gr.components.Textbox(label="Message ID"),
40
- gr.components.Textbox(label="Headers"),
41
- gr.components.Textbox(label="Attachments"),
42
  gr.components.Textbox(label="Cleaned Body")
43
  ],
44
  title="Email Info",
 
1
  import gradio as gr
2
+ import re
3
  from mailparser import parse_from_string
4
  from bs4 import BeautifulSoup
5
 
 
14
  cleaned_text = ' '.join(soup.get_text(separator=' ').split())
15
  return cleaned_text
16
 
17
+ def remove_special_characters(text):
18
+ pattern = r'[=_-]+'
19
+
20
+ cleaned_text = re.sub(pattern, '', text)
21
+ return cleaned_text
22
+
23
  def present(email_content):
24
  email = accept_mail(email_content)
25
  cleaned_text = clean_email(email)
26
+ further_cleaned_text = remove_special_characters(cleaned_text)
27
  email_info = {
28
  "Subject": email.subject,
29
  "From": email.from_,
30
  "To": email.to,
31
  "Date": email.date,
32
+ # "Message ID": email.message_id,
33
+ # "Headers": str(email.headers),
34
+ "Cleaned Body": further_cleaned_text
 
35
  }
36
  return [email_info[key] for key in email_info]
37
 
 
43
  gr.components.Textbox(label="From"),
44
  gr.components.Textbox(label="To"),
45
  gr.components.Textbox(label="Date"),
46
+ # gr.components.Textbox(label="Message ID"),
47
+ # gr.components.Textbox(label="Headers"),
 
48
  gr.components.Textbox(label="Cleaned Body")
49
  ],
50
  title="Email Info",
requirements.txt CHANGED
@@ -3,4 +3,5 @@ mail-parser
3
  scipy==1.12
4
  gradio
5
  typing
6
- bs4
 
 
3
  scipy==1.12
4
  gradio
5
  typing
6
+ bs4
7
+ re