Curative commited on
Commit
3241a33
·
verified ·
1 Parent(s): 0e8551f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -114
app.py CHANGED
@@ -1,133 +1,44 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoTokenizer
3
- import torch
4
 
5
- # —— Lazy‑loaded pipelines & tokenizers —— #
6
- summarizer = sentiment = ner = classifier = None
7
- ner_tokenizer = None
 
 
8
 
9
- def get_summarizer():
10
- global summarizer
11
- if summarizer is None:
12
- summarizer = pipeline(
13
- "summarization",
14
- model="Curative/t5-summarizer-cnn",
15
- framework="pt"
16
- )
17
- return summarizer
18
-
19
- def get_sentiment():
20
- global sentiment
21
- if sentiment is None:
22
- sentiment = pipeline(
23
- "sentiment-analysis",
24
- model="distilbert-base-uncased-finetuned-sst-2-english",
25
- framework="pt"
26
- )
27
- return sentiment
28
-
29
- def get_classifier():
30
- global classifier
31
- if classifier is None:
32
- classifier = pipeline(
33
- "zero-shot-classification",
34
- model="facebook/bart-large-mnli",
35
- framework="pt"
36
- )
37
- return classifier
38
-
39
- def get_ner():
40
- global ner, ner_tokenizer
41
- if ner is None:
42
- # Load Fast tokenizer explicitly for proper aggregation
43
- ner_tokenizer = AutoTokenizer.from_pretrained(
44
- "elastic/distilbert-base-uncased-finetuned-conll03-english",
45
- use_fast=True
46
- )
47
- ner = pipeline(
48
- "ner",
49
- model="elastic/distilbert-base-uncased-finetuned-conll03-english",
50
- tokenizer=ner_tokenizer,
51
- aggregation_strategy="simple",
52
- framework="pt"
53
- )
54
- return ner
55
-
56
- # —— Helper functions —— #
57
- def chunk_and_summarize(text: str) -> str:
58
- """Split on sentences into ≤1,000 char chunks, summarize each, then join."""
59
- summarizer = get_summarizer()
60
- max_chunk = 1000
61
- sentences = text.split(". ")
62
- chunks, current = [], ""
63
- for sent in sentences:
64
- # +2 accounts for the period and space
65
- if len(current) + len(sent) + 2 <= max_chunk:
66
- current += sent + ". "
67
- else:
68
- chunks.append(current.strip())
69
- current = sent + ". "
70
- if current:
71
- chunks.append(current.strip())
72
-
73
- summaries = []
74
- for chunk in chunks:
75
- part = summarizer(
76
- chunk,
77
- max_length=150,
78
- min_length=40,
79
- do_sample=False
80
- )[0]["summary_text"]
81
- summaries.append(part)
82
- return " ".join(summaries)
83
-
84
- def merge_entities(ents):
85
- """Merge sub‑word tokens (##…) into full words."""
86
- merged = []
87
- for e in ents:
88
- w, t = e["word"], e["entity_group"]
89
- if w.startswith("##") and merged:
90
- merged[-1]["word"] += w.replace("##", "")
91
- else:
92
- merged.append({"word": w, "type": t})
93
- return merged
94
 
95
  def process(text, features):
96
- out = {}
97
  if "Summarization" in features:
98
- out["summary"] = chunk_and_summarize(text) # :contentReference[oaicite:7]{index=7}
 
99
  if "Sentiment" in features:
100
- s = get_sentiment()(text)[0]
101
- out["sentiment"] = {"label": s["label"], "score": s["score"]}
102
  if "Classification" in features:
103
- labels = ["technology","sports","business","politics",
104
- "health","science","travel","entertainment"]
105
- cls = get_classifier()(text, candidate_labels=labels)
106
- # Zip & sort
107
- pairs = sorted(
108
- zip(cls["labels"], cls["scores"]),
109
- key=lambda x: x[1],
110
- reverse=True
111
- )
112
- out["classification"] = [
113
- {"label": lbl, "score": scr} for lbl, scr in pairs
114
- ]
115
  if "Entities" in features:
116
- ents = get_ner()(text)
117
- out["entities"] = merge_entities(ents) # :contentReference[oaicite:8]{index=8}
118
- return out
119
 
120
- # —— Gradio UI ���— #
121
  with gr.Blocks() as demo:
122
- gr.Markdown("## 🛠️ MultiFeature NLP Service")
123
- inp = gr.Textbox(lines=8, placeholder="Enter your text here…")
124
  feats = gr.CheckboxGroup(
125
- ["Summarization","Sentiment","Classification","Entities"],
126
  label="Select features to run"
127
  )
128
  btn = gr.Button("Run")
129
  out = gr.JSON(label="Results")
130
-
131
  btn.click(process, [inp, feats], out)
132
 
133
  demo.queue(api_open=True).launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
3
 
4
+ # Initialize pipelines
5
+ sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
6
+ classification_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
7
+ ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
8
+ summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
9
 
10
+ # Define candidate labels for classification
11
+ candidate_labels = [
12
+ "technology", "sports", "business", "politics",
13
+ "health", "science", "travel", "entertainment"
14
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def process(text, features):
17
+ result = {}
18
  if "Summarization" in features:
19
+ summary = summarization_pipeline(text, max_length=150, min_length=40, do_sample=False)
20
+ result["summary"] = summary[0]["summary_text"]
21
  if "Sentiment" in features:
22
+ sentiment = sentiment_pipeline(text)[0]
23
+ result["sentiment"] = {"label": sentiment["label"], "score": sentiment["score"]}
24
  if "Classification" in features:
25
+ classification = classification_pipeline(text, candidate_labels=candidate_labels)
26
+ result["classification"] = dict(zip(classification["labels"], classification["scores"]))
 
 
 
 
 
 
 
 
 
 
27
  if "Entities" in features:
28
+ entities = ner_pipeline(text)
29
+ result["entities"] = [{"word": entity["word"], "type": entity["entity_group"]} for entity in entities]
30
+ return result
31
 
32
+ # Build Gradio interface
33
  with gr.Blocks() as demo:
34
+ gr.Markdown("## 🛠️ Multi-Feature NLP Service")
35
+ inp = gr.Textbox(lines=6, placeholder="Enter your text here…")
36
  feats = gr.CheckboxGroup(
37
+ ["Summarization", "Sentiment", "Classification", "Entities"],
38
  label="Select features to run"
39
  )
40
  btn = gr.Button("Run")
41
  out = gr.JSON(label="Results")
 
42
  btn.click(process, [inp, feats], out)
43
 
44
  demo.queue(api_open=True).launch()