krishnapal2308 commited on
Commit
cbc9c5c
·
verified ·
1 Parent(s): 1fd0272

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +97 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ from transformers import AutoTokenizer, TFAutoModelForTokenClassification
4
+
5
+
6
+ model_name = "krishnapal2308/NER-Task3"
7
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
8
+ model = TFAutoModelForTokenClassification.from_pretrained(model_name)
9
+
10
+
11
+ id2label = {
12
+ 0: "O",
13
+ 1: "B-treatment", 2: "I-treatment",
14
+ 3: "B-chronic_disease", 4: "I-chronic_disease",
15
+ 5: "B-cancer", 6: "I-cancer",
16
+ 7: "B-allergy_name", 8: "I-allergy_name"
17
+ }
18
+
19
+ def predict(text):
20
+ inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
21
+ outputs = model(inputs)
22
+ predictions = tf.argmax(outputs.logits, axis=-1)
23
+
24
+ tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
25
+ labels = [id2label[pred.numpy()] for pred in predictions[0]]
26
+
27
+ # Remove special tokens and group B- and I- tags
28
+ result = []
29
+ current_word = ""
30
+ current_label = None
31
+
32
+ for token, label in zip(tokens, labels):
33
+ if token in ["[CLS]", "[SEP]", "[PAD]"]:
34
+ continue
35
+
36
+ if token.startswith("##"):
37
+ current_word += token[2:] # Append without '##'
38
+ else:
39
+ if current_word: # Save the previous word before starting a new one
40
+ result.append((current_word, current_label))
41
+ current_word = token
42
+ current_label = label[2:] if label.startswith("B-") else label[2:] if label.startswith("I-") and current_label == label[2:] else None
43
+
44
+ if current_word: # Add the last word
45
+ result.append((current_word, current_label))
46
+
47
+ final_result = []
48
+ to_skip = []
49
+ # Combining words with same labels
50
+ for ind, word_label in enumerate(result):
51
+ print(ind, word_label)
52
+ if ind not in to_skip:
53
+ if word_label[1]:
54
+ combined_word = word_label[0]
55
+ for next_ind, next_word_label in enumerate(result[ind+1:]):
56
+ if word_label[1] == next_word_label[1]:
57
+ to_skip.append(ind+next_ind+1)
58
+ combined_word += ' '+next_word_label[0]
59
+ final_result.append((combined_word, word_label[1]))
60
+ else:
61
+ final_result.append((word_label[0], word_label[1]))
62
+
63
+ final_result = [(word, 'allergy') if label == 'allergy_name' else (word, label) for word, label in final_result]
64
+
65
+ return final_result
66
+
67
+ def ner_function(text):
68
+ result = predict(text)
69
+ return result
70
+
71
+ examples = [
72
+ ["The patient was diagnosed with stage 2 breast cancer and treated with tamoxifen."],
73
+ ["He has a history of type 2 diabetes and is allergic to penicillin."]
74
+ ]
75
+
76
+
77
+ # Create Gradio interface
78
+ iface = gr.Interface(
79
+ fn=ner_function,
80
+ inputs=gr.Textbox(lines=5, label="Input Text"),
81
+ outputs=gr.HighlightedText(label="Text with Entities"),
82
+ title="Clinical Trial Named Entity Recognition",
83
+ description="""
84
+ This interface presents a Named Entity Recognition (NER) system specifically designed for analyzing clinical trial data.
85
+
86
+ Leveraging a fine-tuned BERT-based model, the system is capable of identifying and classifying key medical entities such as treatments, chronic diseases, cancers, and allergies.
87
+
88
+ Explore the provided examples to observe the model's capabilities in action.
89
+ """,
90
+ examples=examples,
91
+ cache_examples=True,
92
+ allow_flagging="never",
93
+ theme="default"
94
+ )
95
+
96
+ # Launch the interface
97
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ tensorflow
3
+ transformers
4
+ numpy