File size: 2,759 Bytes
d4087f2
 
 
1c75fc2
d4087f2
 
 
 
 
 
 
8763210
 
 
 
 
 
 
 
 
 
 
 
a6b8e87
1c75fc2
 
a6b8e87
8763210
a6b8e87
8763210
a6b8e87
 
1c75fc2
8763210
 
d4087f2
a6b8e87
 
 
 
d4087f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c75fc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import streamlit as st
import mailparser
from email_reply_parser import EmailReplyParser
from bs4 import BeautifulSoup

# Function to extract the latest email message from raw email content
def extract_latest_message(raw_email):
    try:
        # Parse the email using mail-parser
        mail = mailparser.parse_from_string(raw_email)

        # Debugging: Display entire mail object to inspect its content
        st.write("Parsed Email Object:")
        st.json(mail.mail_json)

        # Inspect text parts of the email (plain text and HTML)
        text_parts = mail.text_plain
        html_parts = mail.text_html

        # Debugging: Output all parts to check what's available
        st.write("Text Parts:", text_parts)
        st.write("HTML Parts:", html_parts)

        # Check if the email contains plain text parts
        if mail.text_plain:
            body = mail.text_plain[0]
            st.write("Extracted plain text body from email.")
        elif mail.text_html:
            # If no plain text is available, fall back to HTML body
            body = mail.text_html[0]
            st.write("Extracted HTML body from email. Converting to plain text...")
            # Use BeautifulSoup to strip HTML tags and convert to plain text
            body = BeautifulSoup(body, "html.parser").get_text()
        else:
            body = "No body content found in email."

        # Debugging: Output the cleaned-up email body before using EmailReplyParser
        st.write("Cleaned-up email body before parsing:")
        st.text_area("Parsed Body", value=body, height=200)

        # Use email-reply-parser to extract only the latest reply (remove quoted thread)
        latest_reply = EmailReplyParser.parse_reply(body)

        return latest_reply
    except Exception as e:
        return f"Error: {e}"

# Streamlit app
def main():
    st.title("Email Latest Message Extractor")

    st.write("""
    This tool extracts the latest message from a raw MIME email and removes any quoted thread or previous messages.
    Paste the raw email in MIME format in the text area below, and the tool will display the latest message.
    """)

    # Input field for the raw email content
    raw_email = st.text_area("Paste the raw MIME email content here", height=300)

    # Button to process the input
    if st.button("Extract Latest Message"):
        if raw_email.strip():
            # Call the function to extract the latest message
            latest_message = extract_latest_message(raw_email)
            st.subheader("Extracted Latest Message:")
            st.text_area("Latest Message", value=latest_message, height=200)
        else:
            st.warning("Please paste the raw MIME email content.")

if __name__ == "__main__":
    main()