Nikhil Singh commited on
Commit
9de97c6
·
1 Parent(s): f6733de

new updates for spacy

Browse files
Files changed (3) hide show
  1. app.py +27 -11
  2. requirements.txt +2 -0
  3. spaces.yml +4 -0
app.py CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
2
  import re
3
  from mailparser import parse_from_string
4
  from bs4 import BeautifulSoup
 
 
 
5
 
6
  def accept_mail(email_content):
7
  email = parse_from_string(email_content)
@@ -15,39 +18,52 @@ def clean_email(email):
15
  return cleaned_text
16
 
17
  def remove_special_characters(text):
18
- pattern = r'[=_-]+'
19
-
20
  cleaned_text = re.sub(pattern, '', text)
21
  return cleaned_text
22
 
23
- def present(email_content):
 
 
 
 
 
 
 
 
24
  email = accept_mail(email_content)
25
  cleaned_text = clean_email(email)
26
  further_cleaned_text = remove_special_characters(cleaned_text)
 
 
 
27
  email_info = {
28
  "Subject": email.subject,
29
  "From": email.from_,
30
  "To": email.to,
31
  "Date": email.date,
32
- # "Message ID": email.message_id,
33
- # "Headers": str(email.headers),
34
- "Cleaned Body": further_cleaned_text
35
  }
36
  return [email_info[key] for key in email_info]
37
 
 
 
38
  demo = gr.Interface(
39
  fn=present,
40
- inputs="text",
 
 
 
41
  outputs=[
42
  gr.components.Textbox(label="Subject"),
43
  gr.components.Textbox(label="From"),
44
  gr.components.Textbox(label="To"),
45
  gr.components.Textbox(label="Date"),
46
- # gr.components.Textbox(label="Message ID"),
47
- # gr.components.Textbox(label="Headers"),
48
- gr.components.Textbox(label="Cleaned Body")
49
  ],
50
  title="Email Info",
51
- description="Enter the email content below to view its details."
52
  )
53
  demo.launch()
 
2
  import re
3
  from mailparser import parse_from_string
4
  from bs4 import BeautifulSoup
5
+ import spacy
6
+
7
+ nlp = spacy.load("en_core_web_sm")
8
 
9
  def accept_mail(email_content):
10
  email = parse_from_string(email_content)
 
18
  return cleaned_text
19
 
20
  def remove_special_characters(text):
21
+ pattern = r'[=_-]+'
 
22
  cleaned_text = re.sub(pattern, '', text)
23
  return cleaned_text
24
 
25
+ def extract_entities(text, labels):
26
+ doc = nlp(text)
27
+ entities = []
28
+ for ent in doc.ents:
29
+ if ent.label_ in labels:
30
+ entities.append((ent.text, ent.label_))
31
+ return entities
32
+
33
+ def present(email_content, labels):
34
  email = accept_mail(email_content)
35
  cleaned_text = clean_email(email)
36
  further_cleaned_text = remove_special_characters(cleaned_text)
37
+ entities = extract_entities(further_cleaned_text, labels)
38
+ entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
39
+
40
  email_info = {
41
  "Subject": email.subject,
42
  "From": email.from_,
43
  "To": email.to,
44
  "Date": email.date,
45
+ "Cleaned Body": further_cleaned_text,
46
+ "Extracted Entities": entity_info
 
47
  }
48
  return [email_info[key] for key in email_info]
49
 
50
+ labels = ["PERSON", "PRODUCT", "DEAL", "ORDER", "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY", "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
51
+
52
  demo = gr.Interface(
53
  fn=present,
54
+ inputs=[
55
+ gr.components.Textbox(label="Email Content"),
56
+ gr.components.CheckboxGroup(label="Labels to Detect", choices=labels, default=labels)
57
+ ],
58
  outputs=[
59
  gr.components.Textbox(label="Subject"),
60
  gr.components.Textbox(label="From"),
61
  gr.components.Textbox(label="To"),
62
  gr.components.Textbox(label="Date"),
63
+ gr.components.Textbox(label="Cleaned Body"),
64
+ gr.components.Textbox(label="Extracted Entities")
 
65
  ],
66
  title="Email Info",
67
+ description="Enter the email content below to view its details and detected entities."
68
  )
69
  demo.launch()
requirements.txt CHANGED
@@ -2,3 +2,5 @@ gliner
2
  mail-parser
3
  gradio
4
  beautifulsoup4
 
 
 
2
  mail-parser
3
  gradio
4
  beautifulsoup4
5
+ spacy>=3.0
6
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
spaces.yml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # spaces.yml
2
+ pip:
3
+ - spacy>=3.0
4
+ - https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz