Sasidhar commited on
Commit
b27a82c
·
1 Parent(s): ea2d98b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -0
app.py CHANGED
@@ -40,6 +40,39 @@ def init_ner_pipeline():
40
  pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
41
  return pipe
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Model initialization
45
  pipeline_summarization = init_text_summarization_model()
@@ -69,6 +102,8 @@ elif selected_menu == "Summarize Document":
69
  elif selected_menu == "Extract Entities":
70
  text = get_text_from_ocr_engine()
71
  output = pipeline_ner (text)
 
 
72
 
73
 
74
  elif selected_menu == "Get Answers":
 
40
  pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
41
  return pipe
42
 
43
+
44
+ def get_formatted_text_for_annotation(output):
45
+ colour_map = {'Sex': '#5DD75D',
46
+ 'Duration': '#D92E45',
47
+ 'Sign_symptom': '#793F41',
48
+ 'Frequency': '#232AE7',
49
+ 'Detailed_description': '#E1D8D1',
50
+ 'History': '#296FB8',
51
+ 'Clinical_event': '#E840A7',
52
+ 'Lab_value': '#FE90C3',
53
+ 'Age': '#31404C',
54
+ 'Biological_structure': '#1A4B5B',
55
+ 'Diagnostic_procedure': '#804E7A'}
56
+
57
+ annotated_texts = []
58
+ next_index = 0
59
+ for entity in output:
60
+ if entity['start'] == next_index:
61
+ # print("found entity")
62
+ extracted_text = text[entity['start']:entity['end']]
63
+ # print("annotated",annotated_text)
64
+ annotated_texts.append((extracted_text ,entity['entity_group'],colour_map[entity['entity_group']]))
65
+ else:
66
+ unannotated_text = text[next_index:entity['start']-1]
67
+ annotated_texts.append(unannotated_text)
68
+ extracted_text = text[entity['start']:entity['end']]
69
+ annotated_texts.append((extracted_text ,entity['entity_group'],colour_map[entity['entity_group']]))
70
+ next_index =entity['end'] +1
71
+
72
+ if next_index < len(text):
73
+ annotated_texts.append(text[next_index-1:len(text)-1])
74
+
75
+ return tuple(annotated_texts)
76
 
77
  # Model initialization
78
  pipeline_summarization = init_text_summarization_model()
 
102
  elif selected_menu == "Extract Entities":
103
  text = get_text_from_ocr_engine()
104
  output = pipeline_ner (text)
105
+ entities_text =get_formatted_text_for_annotation(output)
106
+ annotated_text(entities_text)
107
 
108
 
109
  elif selected_menu == "Get Answers":