email-parser / app.py
Nikhil Singh
error fix
f9779a0
raw
history blame
2.38 kB
import gradio as gr
import re
from mailparser import parse_from_string
from bs4 import BeautifulSoup
import spacy
nlp = spacy.load("en_core_web_sm")
def accept_mail(email_content):
email = parse_from_string(email_content)
return email
def clean_email(email):
soup = BeautifulSoup(email.body, 'html.parser')
for tag in soup.find_all(['style', 'link']):
tag.decompose()
cleaned_text = ' '.join(soup.get_text(separator=' ').split())
return cleaned_text
def remove_special_characters(text):
pattern = r'[=_-]+'
cleaned_text = re.sub(pattern, '', text)
return cleaned_text
def get_sentences(further_cleaned_text):
doc = nlp(further_cleaned_text)
sentences = [sent.text for sent in doc.sents]
return sentences
# doc = nlp(text)
# entities = []
# for ent in doc.ents:
# if ent.label_ in labels:
# entities.append((ent.text, ent.label_))
# return entities
def present(email_content, labels):
email = accept_mail(email_content)
cleaned_text = clean_email(email)
further_cleaned_text = remove_special_characters(cleaned_text)
sentence_list = get_sentences(further_cleaned_text)
# entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
email_info = {
"Subject": email.subject,
"From": email.from_,
"To": email.to,
"Date": email.date,
"Cleaned Body": further_cleaned_text,
# "Extracted Entities": entity_info
}
return [email_info[key] for key in email_info]
labels = ["PERSON", "PRODUCT", "DEAL", "ORDER",
"ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY",
"MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
demo = gr.Interface(
fn=present,
inputs=[
gr.components.Textbox(label="Email Content"),
gr.components.CheckboxGroup(label="Labels to Detect", choices=labels, default=labels)
],
outputs=[
gr.components.Textbox(label="Subject"),
gr.components.Textbox(label="From"),
gr.components.Textbox(label="To"),
gr.components.Textbox(label="Date"),
gr.components.Textbox(label="Cleaned Body"),
# gr.components.Textbox(label="Extracted Entities")
],
title="Email Info",
description="Enter the email content below to view its details and detected entities."
)
demo.launch()