Nikhil Singh commited on
Commit
f9779a0
·
1 Parent(s): 9de97c6
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -22,20 +22,23 @@ def remove_special_characters(text):
22
  cleaned_text = re.sub(pattern, '', text)
23
  return cleaned_text
24
 
25
- def extract_entities(text, labels):
26
- doc = nlp(text)
27
- entities = []
28
- for ent in doc.ents:
29
- if ent.label_ in labels:
30
- entities.append((ent.text, ent.label_))
31
- return entities
 
 
 
32
 
33
  def present(email_content, labels):
34
  email = accept_mail(email_content)
35
  cleaned_text = clean_email(email)
36
  further_cleaned_text = remove_special_characters(cleaned_text)
37
- entities = extract_entities(further_cleaned_text, labels)
38
- entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
39
 
40
  email_info = {
41
  "Subject": email.subject,
@@ -43,11 +46,13 @@ def present(email_content, labels):
43
  "To": email.to,
44
  "Date": email.date,
45
  "Cleaned Body": further_cleaned_text,
46
- "Extracted Entities": entity_info
47
  }
48
  return [email_info[key] for key in email_info]
49
 
50
- labels = ["PERSON", "PRODUCT", "DEAL", "ORDER", "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY", "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
 
 
51
 
52
  demo = gr.Interface(
53
  fn=present,
@@ -61,7 +66,7 @@ demo = gr.Interface(
61
  gr.components.Textbox(label="To"),
62
  gr.components.Textbox(label="Date"),
63
  gr.components.Textbox(label="Cleaned Body"),
64
- gr.components.Textbox(label="Extracted Entities")
65
  ],
66
  title="Email Info",
67
  description="Enter the email content below to view its details and detected entities."
 
22
  cleaned_text = re.sub(pattern, '', text)
23
  return cleaned_text
24
 
25
+ def get_sentences(further_cleaned_text):
26
+ doc = nlp(further_cleaned_text)
27
+ sentences = [sent.text for sent in doc.sents]
28
+ return sentences
29
+ # doc = nlp(text)
30
+ # entities = []
31
+ # for ent in doc.ents:
32
+ # if ent.label_ in labels:
33
+ # entities.append((ent.text, ent.label_))
34
+ # return entities
35
 
36
  def present(email_content, labels):
37
  email = accept_mail(email_content)
38
  cleaned_text = clean_email(email)
39
  further_cleaned_text = remove_special_characters(cleaned_text)
40
+ sentence_list = get_sentences(further_cleaned_text)
41
+ # entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
42
 
43
  email_info = {
44
  "Subject": email.subject,
 
46
  "To": email.to,
47
  "Date": email.date,
48
  "Cleaned Body": further_cleaned_text,
49
+ # "Extracted Entities": entity_info
50
  }
51
  return [email_info[key] for key in email_info]
52
 
53
+ labels = ["PERSON", "PRODUCT", "DEAL", "ORDER",
54
+ "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY",
55
+ "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
56
 
57
  demo = gr.Interface(
58
  fn=present,
 
66
  gr.components.Textbox(label="To"),
67
  gr.components.Textbox(label="Date"),
68
  gr.components.Textbox(label="Cleaned Body"),
69
+ # gr.components.Textbox(label="Extracted Entities")
70
  ],
71
  title="Email Info",
72
  description="Enter the email content below to view its details and detected entities."