Spaces:
Running
on
Zero
Running
on
Zero
wjm55
commited on
Commit
·
52222e9
1
Parent(s):
0f72e2b
fixed the issue with entities and text output
Browse files
app.py
CHANGED
@@ -140,10 +140,17 @@ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct", run_ner=False, ner_
|
|
140 |
if last_end < len(ocr_text):
|
141 |
highlighted_text.append((ocr_text[last_end:], None))
|
142 |
|
|
|
|
|
|
|
|
|
143 |
return highlighted_text
|
144 |
|
145 |
# If NER is disabled, return the text without highlighting
|
146 |
-
|
|
|
|
|
|
|
147 |
|
148 |
css = """
|
149 |
/* Overall app styling */
|
@@ -265,37 +272,23 @@ with gr.Blocks(css=css) as demo:
|
|
265 |
if isinstance(image, np.ndarray):
|
266 |
image = Image.fromarray(image)
|
267 |
|
268 |
-
# Create a temporary directory
|
269 |
with tempfile.TemporaryDirectory() as temp_dir:
|
270 |
# Save image
|
271 |
img_path = os.path.join(temp_dir, f"{fname}.png")
|
272 |
image.save(img_path)
|
273 |
|
274 |
-
#
|
275 |
-
|
276 |
-
entities = []
|
277 |
-
current_pos = 0
|
278 |
-
|
279 |
-
# Process the highlighted text data
|
280 |
-
for segment, label in text_data:
|
281 |
-
full_text += segment
|
282 |
-
if label: # If this segment has a label (is an entity)
|
283 |
-
entities.append({
|
284 |
-
"text": segment,
|
285 |
-
"label": label,
|
286 |
-
"start": current_pos,
|
287 |
-
"end": current_pos + len(segment)
|
288 |
-
})
|
289 |
-
current_pos += len(segment)
|
290 |
|
291 |
# Save text
|
292 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
293 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
294 |
-
f.write(
|
295 |
|
296 |
# Create JSON with text and entities
|
297 |
json_data = {
|
298 |
-
"text":
|
299 |
"entities": entities,
|
300 |
"image_file": f"{fname}.png"
|
301 |
}
|
|
|
140 |
if last_end < len(ocr_text):
|
141 |
highlighted_text.append((ocr_text[last_end:], None))
|
142 |
|
143 |
+
# Store the original text and entities as attributes of the highlighted_text list
|
144 |
+
highlighted_text.original_text = ocr_text
|
145 |
+
highlighted_text.entities = ner_results
|
146 |
+
|
147 |
return highlighted_text
|
148 |
|
149 |
# If NER is disabled, return the text without highlighting
|
150 |
+
result = [(ocr_text, None)]
|
151 |
+
result.original_text = ocr_text
|
152 |
+
result.entities = []
|
153 |
+
return result
|
154 |
|
155 |
css = """
|
156 |
/* Overall app styling */
|
|
|
272 |
if isinstance(image, np.ndarray):
|
273 |
image = Image.fromarray(image)
|
274 |
|
|
|
275 |
with tempfile.TemporaryDirectory() as temp_dir:
|
276 |
# Save image
|
277 |
img_path = os.path.join(temp_dir, f"{fname}.png")
|
278 |
image.save(img_path)
|
279 |
|
280 |
+
# Get the original OCR text and entities
|
281 |
+
original_text = getattr(text_data, 'original_text', '')
|
282 |
+
entities = getattr(text_data, 'entities', [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
# Save text
|
285 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
286 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
287 |
+
f.write(original_text)
|
288 |
|
289 |
# Create JSON with text and entities
|
290 |
json_data = {
|
291 |
+
"text": original_text,
|
292 |
"entities": entities,
|
293 |
"image_file": f"{fname}.png"
|
294 |
}
|