Spaces:

subpixel
/

mailsensei

Sleeping

App Files Files Community

subpixel commited on Nov 22, 2023

Commit

d5761e7

1 Parent(s): 6679c3b

Add project

Browse files

Files changed (6) hide show

.gitignore +9 -0
LLM.py +75 -0
README.md +125 -13
app.py +104 -0
email_utils.py +140 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+/myenv
+__pycache__
+cache
+venv
+.env
+.vscode
+llm.log
+.upm
+.pythonlibs

LLM.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import logging
+import timeit
+import json
+import os
+import torch
+import streamlit as st
+# This should stay above the import of transformers to have model downloaded in the same directory as the project
+os.environ['TRANSFORMERS_CACHE'] = os.curdir + '/cache'
+from transformers import pipeline
+logging.basicConfig(
+    level=logging.INFO,
+    filename='llm.log',
+    filemode='a',
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+@st.cache_resource
+def init():
+    summarizer = pipeline("summarization",
+                          model="sshleifer/distilbart-cnn-12-6",
+                          use_fast=True,
+                          device=0 if torch.cuda.is_available() else -1
+                          )
+    detector = pipeline(
+        "text-classification",
+        model="1aurent/distilbert-base-multilingual-cased-finetuned-email-spam",
+        use_fast=True)
+    tagger = pipeline("text2text-generation",
+                      model="fabiochiu/t5-base-tag-generation",
+                      use_fast=True)
+    return [summarizer, detector, tagger]
+def summarize(prompt, summarizer):
+    start = timeit.default_timer()
+    summarized = summarizer(prompt[:2048], truncation=True)
+    stop = timeit.default_timer()
+    logging.info(f"Summary: {summarized}")
+    logging.info(f"Time taken to summarize: {stop - start}")
+    return summarized
+def detect_spam(prompt, detector):
+    spam = detector(prompt[:2048], truncation=True)
+    return spam[0]['label']
+def get_tags(prompt, tagger):
+    tags = tagger(prompt[:2048], truncation=True)
+    return tags
+# if __name__ == "__main__":
+#   llm = Summarizer()
+#   summary = llm.summarize("""
+# image.png
+# Job Chahiye!?!?
+# GDSC is here with another fantastic event
+# DSA Busted
+# This event will teach you about DATA STRUCTURES AND ALGORITHMS, as well as how to tackle coding rounds.
+# Every Saturday, we will have live doubt sessions.
+# Every Sunday, we will have a quiz.
+# CERTIFICATE and  Exciting GOODIES from GOOGLE.
+# So, don't pass up this excellent opportunity to begin or fast track your placement preparations.
+# """)
+#   print(summary)

README.md CHANGED Viewed

@@ -1,13 +1,125 @@
----
-title: Mailsensei
-emoji: 📊
-colorFrom: yellow
-colorTo: purple
-sdk: streamlit
-sdk_version: 1.28.2
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 🚀 Introducing MailSensei
+## Your Cutting-Edge Email Management Companion 📧✂️
+<!-- <img  src="https://i.ibb.co/Bg6h8qp/your-image.png"  alt="MailSensei Image"> -->
+<!-- ![Image](./PeerlistHackathon_CoverImg.png) -->
+![Cover Image](https://raw.githubusercontent.com/Deven1902/Peerlist-Hackathon/main/PeerlistHackathon_CoverImg.png)
+Tired of sifting through endless emails, only to discover they're not worth your time? Ever missed important messages due to email overload? In today's digital age, our inboxes are overflowing with both vital and distracting content. You might spend precious time on lengthy, irrelevant emails. MailSensei is here to revolutionize your email experience by addressing these challenges. 🚀
+### Need for a Solution 🤔
+![Email Overload](https://media.giphy.com/media/OothRHNJSCaTS/giphy.gif)
+CEOs and business professionals often receive hundreds of emails daily, making efficient email management essential. MailSensei offers a solution to navigate this email overload, ensuring that crucial messages are never missed and time is spent more productively. 🕒💼
+### How to run the project 🌟
+- Visit the following URL to locate the project:- **[https://replit.com/@DevenNandapurka/Peerlist-Hackathon](https://replit.com/@DevenNandapurka/Peerlist-Hackathon)**
+- You will see the replit workspace. Click the `Run` button.
+- You need to setup an **App Password** before you can set credentails for MailSensei.
+Here we have attached an article to guide you through setting up an app password -> **[Generate your App Password](https://support.google.com/mail/answer/185833?hl=en)**
+- Once the project is up and running, enter your credentials in the project and click the `Set Credentials` button.
+- Click the `Fetch Emails` button -> and all of your unread emails will be fetched and rendered in a short summary format.
+### Our Unique Approach 🌟
+- **Gmail Credentials Setup:** Securely collect your "Gmail Address" and "App Password" via Streamlit to maintain the confidentiality of your email data. 🔒
+- **IMAP Connection:** Establish a connection to your Gmail account using IMAP (Internet Message Access Protocol) for seamless email access. 🌐
+- **Email Decoding:** Decode your emails to extract essential information, including "From," "Subject," and email content. 📤
+- **Email Summarization:** Utilize Large Language Models (LLMs), specifically "distilbart-cnn-12-6," to generate concise summaries of your emails, saving you valuable time. 📝⏳
+- **Tag Generation:** Intelligently categorize and organize emails using "t5-base-tag-generation." Tags are displayed alongside email details. 🏷️
+- **Streamlined Presentation:** Display email information in an ordered format, including "From," "Subject," and associated tags for easy reference. 📊
+- **Dropdown Summaries:** Provide a dropdown button for each email, allowing users to access the summary with a single click, enhancing readability. 📑
+- **Original Email Link:** Include a convenient link at the end of each summary that directs users to the original email for further context and action. 🔗
+### Technology Stack 🛠️
+- **Programming Languages:** Python
+- **Web Framework:** Streamlit
+- **Machine Learning Framework:** PyTorch
+- **Email Access:** IMAP (Internet Message Access Protocol)
+- **Text Processing:** Transformers library (from Hugging Face)
+- **Logging:** Python Logging Library
+### Models Used 🧠
+1. **Large Language Model (LLM) for Summarization:**
+   - **Model:** `sshleifer/distilbart-cnn-12-6` (DistilBART)
+   - **Purpose:** Summarizes email content to generate concise summaries.
+   - **Utilized for:** Email summarization.
+2. **Spam Detection Model(Currently commented out as results are not accurate) :**
+   - **Model:** `1aurent/distilbert-base-multilingual-cased-finetuned-email-spam` (DistilBERT)
+   - **Purpose:** Detects spam emails within the inbox.
+   - **Utilized for:** Spam filtering.
+3. **Tag Generation Model:**
+   - **Model:** `fabiochiu/t5-base-tag-generation` (T5)
+   - **Purpose:** Generates tags to categorize and organize emails.
+   - **Utilized for:** Email tagging and categorization.
+### Running locally:
+Requirements:
+- Python 3.7+
+- cuda (optional for GPU support)
+- Minimum 4GB RAM is required to run the app
+clone the repository:
+```bash
+git clone https://github.com/Deven1902/Peerlist-Hackathon
+```
+optionally create a virtual environment:
+```bash
+python -m venv venv
+```
+activate the virtual environment:
+for Linux
+```bash
+source venv/bin/activate
+```
+for Windows
+```bash
+venv\Scripts\activate
+```
+install requirements using pip:
+```bash
+pip install -r requirements.txt
+```
+run the app:
+```bash
+streamlit run app/streamlit_app.py
+```
+### Future Enhancements 🚀🔮
+- **Improved Spam Filtering:** We'll enhance the accuracy of our spam detection algorithm for a cleaner inbox. 🚮
+- **Tag Collections & Sharing:** Users can create and share tag collections, streamlining collaboration and productivity. 🚀📊
+- **Custom Tag Generation:** Customize your own tag generation models for tailored email organization. 🏷️
+- **User Preferences:** Fine-tune MailSensei with preferences like summarization length and tag rules for a personalized experience. ⚙️👤
+*"Initially, we used OpenAI keys for all our app functions. However, we later changed our approach as some users may not have premium keys, and we wanted to ensure accessibility without additional costs. 😉😊"*

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import streamlit as st
+import email_utils
+import LLM
+if "model" not in st.session_state.keys():
+    st.session_state["model"] = LLM.init()
+model = st.session_state["model"]
+if "credentials_set" not in st.session_state.keys():
+    st.session_state['credentials_set'] = False
+def start():
+    with st.sidebar:
+        st.markdown("# Email Summarizer")
+        st.subheader("Email Credentials")
+        from_email = st.text_input("Email Address")
+        from_password = st.text_input("App Password", type="password")
+        if st.button("Set Credentials"):
+            if not from_email or not from_password:
+                st.error("Please provide both email address and password.")
+            else:
+                if email_utils.set_credentials(from_email, from_password):
+                    st.session_state['credentials_set'] = True
+                    st.success(f"Credentials set successfully. Email: {from_email}")
+                    email_ids = email_utils.fetch_emails_from_imap(from_email, from_password)
+                    st.session_state.update(email_ids=email_ids)
+                else:
+                    st.error("Failed to set credentials")
+    if st.session_state['credentials_set']:
+        if st.button("Fetch Emails"):
+            try:
+                with st.spinner('Loading...'):
+                    render_emails(from_email, from_password)
+            except Exception as e:
+                print(e)
+    else:
+        st.button("Fetch Emails", disabled=True)
+        st.warning("Please set credentials first", icon="⚠️",)
+def render_emails(from_email, from_password, page_size=10):
+    """Renders the email messages in a Streamlit application with pagination.
+        Args:
+          email_messages: A list of email messages.
+          page_number: The current page number.
+          page_size: The number of emails to display per page.
+        """
+    page_number = st.session_state.get("page", 1)
+    email_ids = st.session_state.email_ids
+    start_index = (page_number - 1) * page_size
+    end_index = start_index + page_size
+    email_messages = email_utils.decode_emails(email_ids, start_index, end_index,
+                                               from_email, from_password)
+    for email_message in email_messages:
+        content = email_utils.strip_tags(email_message["content"])
+        summary = LLM.summarize(content, model[0])
+        tags = LLM.get_tags(content, model[2])
+        # spam = LLM.detect_spam(content, model[1])
+        # Add a redirect button that links to the original email
+        redirect_url = f'https://mail.google.com/mail/u/0/#search/rfc822msgid%3A{email_message["Message ID"]}'
+        EMAIL_FROM= email_message['from']
+        EMAIL_SUBJECT = email_message['subject'] if email_message['subject'].strip() else "No Subject"
+        EMAIL_TAGS = tags[0]['generated_text']
+        EMAIL_SUMMARY = summary[0]['summary_text']
+        with st.expander(
+            f"**From**:\n{EMAIL_FROM}\n\n**Subject**:\n{EMAIL_SUBJECT}\n\n**Tags**:\n{EMAIL_TAGS}\n\n"
+        ):
+            # tag_html = ''.join([f'<span style="display: inline-block; background-color: rgba(230, 230, 230, 0.2); padding: 3px 6px; margin-right: 8px; border-radius: 5px;">{tag}</span>' for tag in tags[0]['generated_text'].split(',')])
+            st.markdown(f"**Summary**:\n {EMAIL_SUMMARY}")
+            st.markdown(f"**[Read full e-mail]({redirect_url})**")
+        # Handle the button click event
+        # if redirect_button_clicked:
+        #     st.write(f"Redirecting to Gmail: {redirect_url}")
+        #     st.experimental_rerun()  # Refresh the app to open the link
+    total = len(st.session_state.email_ids)
+    # Add buttons to allow the user to navigate between pages.
+    if page_number > 1:
+        st.button('Previous page',
+                  on_click=lambda: (st.session_state.update(page=page_number - 1),
+                                    render_emails(from_email, from_password)))
+    if page_number < total:
+        st.button('Next page',
+                  on_click=lambda: (st.session_state.update(page=page_number + 1),
+                                    render_emails(from_email, from_password)))
+if __name__ == "__main__":
+    start()

email_utils.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from html.parser import HTMLParser
+from io import StringIO
+import email
+import imaplib
+def set_credentials(username, password):
+    """Sets the IMAP credentials. and check if the credentials are valid.
+      Args:
+        username: The Gmail username.
+        password: The Gmail password.
+      """
+    try:
+        imap_server = 'imap.gmail.com'
+        imap_port = 993
+        # Create an IMAP connection.
+        imap_connection = imaplib.IMAP4_SSL(imap_server, imap_port)
+        # Login to the IMAP server.
+        imap_connection.login(username, password)
+        return True
+    except:
+        return False
+def fetch_emails_from_imap(username, password):
+    """Fetches emails from IMAP with pagination.
+      Args:
+        username: The Gmail username.
+        password: The Gmail password.
+        page_number: The current page number.
+        page_size: The number of emails to display per page.
+      Returns:
+        A list of email messages.
+      """
+    imap_server = 'imap.gmail.com'
+    imap_port = 993
+    # Create an IMAP connection.
+    imap_connection = imaplib.IMAP4_SSL(imap_server, imap_port)
+    # Login to the IMAP server.
+    imap_connection.login(username, password)
+    # print(f"{imap_connection.list()[1][0] = }")
+    # Select the INBOX mailbox.
+    imap_connection.select('INBOX', readonly=True)
+    # Search for all unread emails.
+    emails = imap_connection.search(None, 'X-GM-RAW "Category:Primary"', "UNSEEN")
+    # Get the email IDs.
+    email_ids = emails[1][0].decode().split(' ')
+    # Get the email messages for the current page.
+    imap_connection.close()
+    email_ids.reverse()
+    return email_ids
+def decode_emails(email_ids, start_index, end_index, username, password):
+    imap_server = 'imap.gmail.com'
+    imap_port = 993
+    # Create an IMAP connection.
+    imap_connection = imaplib.IMAP4_SSL(imap_server, imap_port)
+    imap_connection.login(username, password)
+    imap_connection.select('INBOX', readonly=True)
+    email_messages = []
+    for email_id in email_ids[start_index:end_index]:
+        email_message = imap_connection.fetch(email_id, '(RFC822)')[1][0][1]
+        msg = email.message_from_bytes(
+            email_message
+        )
+        email_subject = msg['subject']
+        text, encoding = email.header.decode_header(msg['subject'])[0]
+        if encoding:
+            email_subject = text.decode(encoding)
+        email_from = msg['from']
+        email_content = ""
+        if msg.is_multipart():
+            for part in msg.walk():
+                if part.get_content_type() == "text/plain":
+                    email_content = part.get_payload(decode=True).decode('utf-8',
+                                                                         errors='ignore')
+                    break
+        else:
+            email_content = msg.get_payload(decode=True).decode('utf-8',
+                                                                errors='ignore')
+        # Extract Message-ID, In-Reply-To, and References headers
+        message_id = msg.get("Message-ID", "")
+        in_reply_to = msg.get("In-Reply-To", "")
+        # Identify the thread or create a new one
+        SingleEmail = {
+            'Message ID': message_id,
+            'from': email_from,
+            'subject': email_subject,
+            'content': email_content,
+            'IsReply': bool(in_reply_to),  # Check if it's a reply
+            'InReplyTo': in_reply_to,  # Add the ID of the parent message
+            'StoreReplyThread': [],
+            # 'summary': llm.summarize(email_content)
+        }
+        email_messages.append(SingleEmail)
+    # Close the IMAP connection.
+    imap_connection.close()
+    return email_messages
+class MLStripper(HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.reset()
+        self.strict = False
+        self.convert_charrefs = True
+        self.text = StringIO()
+    def handle_data(self, d):
+        self.text.write(d)
+    def get_data(self):
+        return self.text.getvalue()
+def strip_tags(html):
+    s = MLStripper()
+    s.feed(html)
+    return s.get_data()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+transformers
+streamlit
+--find-links https://download.pytorch.org/whl/torch_stable.html
+torch
+optimum
+auto-gptq