wjm55 commited on
Commit
3057e7a
·
1 Parent(s): 71e27e4
Files changed (1) hide show
  1. app.py +24 -8
app.py CHANGED
@@ -114,18 +114,34 @@ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct", run_ner=False, ner_
114
  threshold=0.3
115
  )
116
 
117
- # Format entities for highlighting
118
- entities = []
119
- for entity in ner_results:
120
- entities.append((
121
- entity["start"],
122
- entity["end"],
 
 
 
 
 
 
 
 
 
 
123
  entity["label"]
124
  ))
 
 
 
 
 
125
 
126
- return [(ocr_text, entities)]
127
 
128
- return [(ocr_text, [])]
 
129
 
130
  css = """
131
  /* Overall app styling */
 
114
  threshold=0.3
115
  )
116
 
117
+ # Create a list of tuples (text, label) for highlighting
118
+ highlighted_text = []
119
+ last_end = 0
120
+
121
+ # Sort entities by start position
122
+ sorted_entities = sorted(ner_results, key=lambda x: x["start"])
123
+
124
+ # Process each entity and add non-entity text segments
125
+ for entity in sorted_entities:
126
+ # Add non-entity text before the current entity
127
+ if last_end < entity["start"]:
128
+ highlighted_text.append((ocr_text[last_end:entity["start"]], None))
129
+
130
+ # Add the entity text with its label
131
+ highlighted_text.append((
132
+ ocr_text[entity["start"]:entity["end"]],
133
  entity["label"]
134
  ))
135
+ last_end = entity["end"]
136
+
137
+ # Add any remaining text after the last entity
138
+ if last_end < len(ocr_text):
139
+ highlighted_text.append((ocr_text[last_end:], None))
140
 
141
+ return highlighted_text
142
 
143
+ # If NER is disabled, return the text without highlighting
144
+ return [(ocr_text, None)]
145
 
146
  css = """
147
  /* Overall app styling */