Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
from mailparser import parse_from_string | |
from bs4 import BeautifulSoup | |
import spacy | |
nlp = spacy.load("en_core_web_sm") | |
def accept_mail(email_content): | |
email = parse_from_string(email_content) | |
return email | |
def clean_email(email): | |
soup = BeautifulSoup(email.body, 'html.parser') | |
for tag in soup.find_all(['style', 'link']): | |
tag.decompose() | |
cleaned_text = ' '.join(soup.get_text(separator=' ').split()) | |
return cleaned_text | |
def remove_special_characters(text): | |
pattern = r'[=_-]+' | |
cleaned_text = re.sub(pattern, '', text) | |
return cleaned_text | |
def get_sentences(further_cleaned_text): | |
doc = nlp(further_cleaned_text) | |
sentences = [sent.text for sent in doc.sents] | |
return sentences | |
# doc = nlp(text) | |
# entities = [] | |
# for ent in doc.ents: | |
# if ent.label_ in labels: | |
# entities.append((ent.text, ent.label_)) | |
# return entities | |
def present(email_content, labels): | |
email = accept_mail(email_content) | |
cleaned_text = clean_email(email) | |
further_cleaned_text = remove_special_characters(cleaned_text) | |
sentence_list = get_sentences(further_cleaned_text) | |
# entity_info = '\n'.join([f"{text}: {label}" for text, label in entities]) | |
email_info = { | |
"Subject": email.subject, | |
"From": email.from_, | |
"To": email.to, | |
"Date": email.date, | |
"Cleaned Body": further_cleaned_text, | |
# "Extracted Entities": entity_info | |
} | |
return [email_info[key] for key in email_info] | |
labels = ["PERSON", "PRODUCT", "DEAL", "ORDER", | |
"ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY", | |
"MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"] | |
demo = gr.Interface( | |
fn=present, | |
inputs=[ | |
gr.components.Textbox(label="Email Content"), | |
gr.components.CheckboxGroup(label="Labels to Detect", choices=labels, default=labels) | |
], | |
outputs=[ | |
gr.components.Textbox(label="Subject"), | |
gr.components.Textbox(label="From"), | |
gr.components.Textbox(label="To"), | |
gr.components.Textbox(label="Date"), | |
gr.components.Textbox(label="Cleaned Body"), | |
# gr.components.Textbox(label="Extracted Entities") | |
], | |
title="Email Info", | |
description="Enter the email content below to view its details and detected entities." | |
) | |
demo.launch() | |