Spaces:

albertoarrigoni
/

email_robo_parser

Sleeping

albertoarrigoni commited on Sep 16, 2024

Commit

1c75fc2

verified ·

1 Parent(s): 7dacdae

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import mailparser
 from email_reply_parser import EmailReplyParser
 # Function to extract the latest email message from raw email content
 def extract_latest_message(raw_email):
@@ -8,8 +9,14 @@ def extract_latest_message(raw_email):
         # Parse the email using mail-parser
         mail = mailparser.parse_from_string(raw_email)
-        # Extract the plain text body, fallback to HTML body if plain text is not available
-        body = mail.text_plain[0] if mail.text_plain else mail.body
         # Use email-reply-parser to extract only the latest reply (remove quoted thread)
         latest_reply = EmailReplyParser.parse_reply(body)
@@ -41,4 +48,4 @@ def main():
             st.warning("Please paste the raw MIME email content.")
 if __name__ == "__main__":
-    main()

 import streamlit as st
 import mailparser
 from email_reply_parser import EmailReplyParser
+from bs4 import BeautifulSoup
 # Function to extract the latest email message from raw email content
 def extract_latest_message(raw_email):
         # Parse the email using mail-parser
         mail = mailparser.parse_from_string(raw_email)
+        # Extract the plain text body if available
+        if mail.text_plain:
+            body = mail.text_plain[0]
+        else:
+            # If plain text is not available, fallback to HTML body and convert to plain text
+            body = mail.body
+            # Use BeautifulSoup to strip HTML tags and get plain text
+            body = BeautifulSoup(body, "html.parser").get_text()
         # Use email-reply-parser to extract only the latest reply (remove quoted thread)
         latest_reply = EmailReplyParser.parse_reply(body)
             st.warning("Please paste the raw MIME email content.")
 if __name__ == "__main__":
+    main()