Spaces:
Sleeping
Sleeping
File size: 2,383 Bytes
8efe659 5777a9a 2fe2a42 13ac7c2 9de97c6 2fe2a42 3fd92e9 13ac7c2 5777a9a 9de97c6 5777a9a f9779a0 9de97c6 28ca0f2 1efe83d 5777a9a f9779a0 9de97c6 13ac7c2 56c79b1 9de97c6 f9779a0 56c79b1 1efe83d f9779a0 9de97c6 3fd92e9 9de97c6 3fd92e9 9de97c6 f9779a0 3fd92e9 9de97c6 3fd92e9 56c79b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import re
from mailparser import parse_from_string
from bs4 import BeautifulSoup
import spacy
nlp = spacy.load("en_core_web_sm")
def accept_mail(email_content):
email = parse_from_string(email_content)
return email
def clean_email(email):
soup = BeautifulSoup(email.body, 'html.parser')
for tag in soup.find_all(['style', 'link']):
tag.decompose()
cleaned_text = ' '.join(soup.get_text(separator=' ').split())
return cleaned_text
def remove_special_characters(text):
pattern = r'[=_-]+'
cleaned_text = re.sub(pattern, '', text)
return cleaned_text
def get_sentences(further_cleaned_text):
doc = nlp(further_cleaned_text)
sentences = [sent.text for sent in doc.sents]
return sentences
# doc = nlp(text)
# entities = []
# for ent in doc.ents:
# if ent.label_ in labels:
# entities.append((ent.text, ent.label_))
# return entities
def present(email_content, labels):
email = accept_mail(email_content)
cleaned_text = clean_email(email)
further_cleaned_text = remove_special_characters(cleaned_text)
sentence_list = get_sentences(further_cleaned_text)
# entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
email_info = {
"Subject": email.subject,
"From": email.from_,
"To": email.to,
"Date": email.date,
"Cleaned Body": further_cleaned_text,
# "Extracted Entities": entity_info
}
return [email_info[key] for key in email_info]
labels = ["PERSON", "PRODUCT", "DEAL", "ORDER",
"ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY",
"MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
demo = gr.Interface(
fn=present,
inputs=[
gr.components.Textbox(label="Email Content"),
gr.components.CheckboxGroup(label="Labels to Detect", choices=labels, default=labels)
],
outputs=[
gr.components.Textbox(label="Subject"),
gr.components.Textbox(label="From"),
gr.components.Textbox(label="To"),
gr.components.Textbox(label="Date"),
gr.components.Textbox(label="Cleaned Body"),
# gr.components.Textbox(label="Extracted Entities")
],
title="Email Info",
description="Enter the email content below to view its details and detected entities."
)
demo.launch()
|