albertoarrigoni commited on
Commit
1c75fc2
·
verified ·
1 Parent(s): 7dacdae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  import mailparser
3
  from email_reply_parser import EmailReplyParser
 
4
 
5
  # Function to extract the latest email message from raw email content
6
  def extract_latest_message(raw_email):
@@ -8,8 +9,14 @@ def extract_latest_message(raw_email):
8
  # Parse the email using mail-parser
9
  mail = mailparser.parse_from_string(raw_email)
10
 
11
- # Extract the plain text body, fallback to HTML body if plain text is not available
12
- body = mail.text_plain[0] if mail.text_plain else mail.body
 
 
 
 
 
 
13
 
14
  # Use email-reply-parser to extract only the latest reply (remove quoted thread)
15
  latest_reply = EmailReplyParser.parse_reply(body)
@@ -41,4 +48,4 @@ def main():
41
  st.warning("Please paste the raw MIME email content.")
42
 
43
  if __name__ == "__main__":
44
- main()
 
1
  import streamlit as st
2
  import mailparser
3
  from email_reply_parser import EmailReplyParser
4
+ from bs4 import BeautifulSoup
5
 
6
  # Function to extract the latest email message from raw email content
7
  def extract_latest_message(raw_email):
 
9
  # Parse the email using mail-parser
10
  mail = mailparser.parse_from_string(raw_email)
11
 
12
+ # Extract the plain text body if available
13
+ if mail.text_plain:
14
+ body = mail.text_plain[0]
15
+ else:
16
+ # If plain text is not available, fallback to HTML body and convert to plain text
17
+ body = mail.body
18
+ # Use BeautifulSoup to strip HTML tags and get plain text
19
+ body = BeautifulSoup(body, "html.parser").get_text()
20
 
21
  # Use email-reply-parser to extract only the latest reply (remove quoted thread)
22
  latest_reply = EmailReplyParser.parse_reply(body)
 
48
  st.warning("Please paste the raw MIME email content.")
49
 
50
  if __name__ == "__main__":
51
+ main()