Nikhil Singh commited on
Commit
a6daf3c
·
1 Parent(s): e8878ab

multilingual added

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -35,16 +35,10 @@ def get_sentences(further_cleaned_text):
35
  doc = nlp(further_cleaned_text)
36
  sentences = [sent.text for sent in doc.sents]
37
  return sentences
38
- # doc = nlp(text)
39
- # entities = []
40
- # for ent in doc.ents:
41
- # if ent.label_ in labels:
42
- # entities.append((ent.text, ent.label_))
43
- # return entities
44
-
45
- def get_model(model_name: str = None):
46
  if model_name is None:
47
- model_name = "urchade/gliner_base"
48
 
49
  global _MODEL
50
 
@@ -53,11 +47,8 @@ def get_model(model_name: str = None):
53
 
54
  return _MODEL[model_name]
55
 
56
- def parse_query(sentences: List[str], labels: Union[str, list], threshold: float = 0.3, nested_ner: bool = False, model_name: str = None) -> List[Dict[str, Union[str, list]]]:
57
- model = get_model(model_name)
58
-
59
- if isinstance(labels, str):
60
- labels = [i.strip() for i in labels.split(",")]
61
 
62
  results = []
63
 
@@ -73,14 +64,13 @@ def parse_query(sentences: List[str], labels: Union[str, list], threshold: float
73
 
74
  return results
75
 
76
- def present(email_content, labels):
77
  email = accept_mail(email_content)
78
  cleaned_text = clean_email(email)
79
  further_cleaned_text = remove_special_characters(cleaned_text)
80
  sentence_list = get_sentences(further_cleaned_text)
81
- # entity_info = '\n'.join([f"{text}: {label}" for text, label in entities])
82
 
83
- result = parse_query(sentence_list, labels, threshold=0.3, nested_ner=False, model_name="urchade/gliner_base")
84
 
85
  email_info = {
86
  "Subject": email.subject,
@@ -92,20 +82,18 @@ def present(email_content, labels):
92
  }
93
  return [email_info[key] for key in email_info]
94
 
95
- labels = ["PERSON", "PRODUCT", "DEAL", "ORDER",
96
- "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY",
97
- "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
98
 
99
  demo = gr.Interface(
100
  fn=present,
101
  inputs=[
102
  gr.components.Textbox(label="Email Content"),
103
- gr.components.Textbox(
104
- value=", ".join(labels),
105
  label="Labels to Detect",
106
- placeholder="Enter the entities to detect here (comma separated)",
107
- scale=2
108
  ),
 
109
  ],
110
  outputs=[
111
  gr.components.Textbox(label="Subject"),
 
35
  doc = nlp(further_cleaned_text)
36
  sentences = [sent.text for sent in doc.sents]
37
  return sentences
38
+
39
+ def get_model(model_name: str = None, multilingual: bool = False):
 
 
 
 
 
 
40
  if model_name is None:
41
+ model_name = "urchade/gliner_base" if not multilingual else "urchade/gliner_multilingual"
42
 
43
  global _MODEL
44
 
 
47
 
48
  return _MODEL[model_name]
49
 
50
+ def parse_query(sentences: List[str], labels: List[str], threshold: float = 0.3, nested_ner: bool = False, model_name: str = None, multilingual: bool = False) -> List[Dict[str, Union[str, list]]]:
51
+ model = get_model(model_name, multilingual=multilingual)
 
 
 
52
 
53
  results = []
54
 
 
64
 
65
  return results
66
 
67
+ def present(email_content, labels, multilingual=False):
68
  email = accept_mail(email_content)
69
  cleaned_text = clean_email(email)
70
  further_cleaned_text = remove_special_characters(cleaned_text)
71
  sentence_list = get_sentences(further_cleaned_text)
 
72
 
73
+ result = parse_query(sentence_list, labels, threshold=0.3, nested_ner=False, model_name="urchade/gliner_base", multilingual=multilingual)
74
 
75
  email_info = {
76
  "Subject": email.subject,
 
82
  }
83
  return [email_info[key] for key in email_info]
84
 
85
+ labels = ["PERSON", "PRODUCT", "DEAL", "ORDER", "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY", "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
 
 
86
 
87
  demo = gr.Interface(
88
  fn=present,
89
  inputs=[
90
  gr.components.Textbox(label="Email Content"),
91
+ gr.components.CheckboxGroup(
92
+ choices=labels,
93
  label="Labels to Detect",
94
+ value=labels, # Default all selected
 
95
  ),
96
+ gr.components.Checkbox(label="Use Multilingual Model")
97
  ],
98
  outputs=[
99
  gr.components.Textbox(label="Subject"),