Update app.py
Browse files
app.py
CHANGED
@@ -208,22 +208,31 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
|
|
208 |
status.error(f"Failed to process PDF: {str(e)}")
|
209 |
return []
|
210 |
|
211 |
-
async def
|
212 |
start_time = time.time()
|
213 |
status = st.empty()
|
214 |
-
status.text("Processing
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
async def process_image_gen(prompt, output_file):
|
229 |
start_time = time.time()
|
@@ -373,7 +382,7 @@ with tab_ocr:
|
|
373 |
all_files = get_gallery_files()
|
374 |
if all_files:
|
375 |
if st.button("OCR All Assets 🚀"):
|
376 |
-
full_text = "# OCR Results\n\n"
|
377 |
for file in all_files:
|
378 |
if file.endswith('.png'):
|
379 |
image = Image.open(file)
|
@@ -383,7 +392,7 @@ with tab_ocr:
|
|
383 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
384 |
doc.close()
|
385 |
output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
|
386 |
-
result = asyncio.run(
|
387 |
full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
|
388 |
entry = f"OCR Test: {file} -> {output_file}"
|
389 |
st.session_state['history'].append(entry)
|
@@ -405,7 +414,7 @@ with tab_ocr:
|
|
405 |
if st.button("Run OCR 🚀", key="ocr_run"):
|
406 |
output_file = generate_filename("ocr_output", "txt")
|
407 |
st.session_state['processing']['ocr'] = True
|
408 |
-
result = asyncio.run(
|
409 |
entry = f"OCR Test: {selected_file} -> {output_file}"
|
410 |
st.session_state['history'].append(entry)
|
411 |
st.text_area("OCR Result", result, height=200, key="ocr_result")
|
@@ -418,7 +427,7 @@ with tab_ocr:
|
|
418 |
pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
419 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
420 |
output_file = generate_filename(f"ocr_page_{i}", "txt")
|
421 |
-
result = asyncio.run(
|
422 |
full_text += f"## Page {i + 1}\n\n{result}\n\n"
|
423 |
entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
|
424 |
st.session_state['history'].append(entry)
|
@@ -454,7 +463,7 @@ with tab_build:
|
|
454 |
entry = f"Built {model_type} model: {model_name}"
|
455 |
st.session_state['history'].append(entry)
|
456 |
st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
|
457 |
-
st.
|
458 |
|
459 |
with tab_imggen:
|
460 |
st.header("Test Image Gen 🎨")
|
@@ -644,7 +653,7 @@ def update_gallery():
|
|
644 |
os.remove(file)
|
645 |
st.session_state['asset_checkboxes'].pop(file, None)
|
646 |
st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
|
647 |
-
st.
|
648 |
|
649 |
update_gallery()
|
650 |
|
|
|
208 |
status.error(f"Failed to process PDF: {str(e)}")
|
209 |
return []
|
210 |
|
211 |
+
async def process_gpt4o_ocr(image, output_file):
|
212 |
start_time = time.time()
|
213 |
status = st.empty()
|
214 |
+
status.text("Processing GPT-4o OCR... (0s)")
|
215 |
+
buffered = BytesIO()
|
216 |
+
image.save(buffered, format="PNG")
|
217 |
+
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
218 |
+
messages = [{
|
219 |
+
"role": "user",
|
220 |
+
"content": [
|
221 |
+
{"type": "text", "text": "Extract the electronic text from this image."},
|
222 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": "auto"}}
|
223 |
+
]
|
224 |
+
}]
|
225 |
+
try:
|
226 |
+
response = client.chat.completions.create(model="gpt-4o", messages=messages, max_tokens=300)
|
227 |
+
result = response.choices[0].message.content
|
228 |
+
elapsed = int(time.time() - start_time)
|
229 |
+
status.text(f"GPT-4o OCR completed in {elapsed}s!")
|
230 |
+
async with aiofiles.open(output_file, "w") as f:
|
231 |
+
await f.write(result)
|
232 |
+
return result
|
233 |
+
except Exception as e:
|
234 |
+
status.error(f"Failed to process image with GPT-4o: {str(e)}")
|
235 |
+
return ""
|
236 |
|
237 |
async def process_image_gen(prompt, output_file):
|
238 |
start_time = time.time()
|
|
|
382 |
all_files = get_gallery_files()
|
383 |
if all_files:
|
384 |
if st.button("OCR All Assets 🚀"):
|
385 |
+
full_text = "# OCR Results (GPT-4o)\n\n"
|
386 |
for file in all_files:
|
387 |
if file.endswith('.png'):
|
388 |
image = Image.open(file)
|
|
|
392 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
393 |
doc.close()
|
394 |
output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
|
395 |
+
result = asyncio.run(process_gpt4o_ocr(image, output_file))
|
396 |
full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
|
397 |
entry = f"OCR Test: {file} -> {output_file}"
|
398 |
st.session_state['history'].append(entry)
|
|
|
414 |
if st.button("Run OCR 🚀", key="ocr_run"):
|
415 |
output_file = generate_filename("ocr_output", "txt")
|
416 |
st.session_state['processing']['ocr'] = True
|
417 |
+
result = asyncio.run(process_gpt4o_ocr(image, output_file))
|
418 |
entry = f"OCR Test: {selected_file} -> {output_file}"
|
419 |
st.session_state['history'].append(entry)
|
420 |
st.text_area("OCR Result", result, height=200, key="ocr_result")
|
|
|
427 |
pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
428 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
429 |
output_file = generate_filename(f"ocr_page_{i}", "txt")
|
430 |
+
result = asyncio.run(process_gpt4o_ocr(image, output_file))
|
431 |
full_text += f"## Page {i + 1}\n\n{result}\n\n"
|
432 |
entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
|
433 |
st.session_state['history'].append(entry)
|
|
|
463 |
entry = f"Built {model_type} model: {model_name}"
|
464 |
st.session_state['history'].append(entry)
|
465 |
st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
|
466 |
+
st.rerun()
|
467 |
|
468 |
with tab_imggen:
|
469 |
st.header("Test Image Gen 🎨")
|
|
|
653 |
os.remove(file)
|
654 |
st.session_state['asset_checkboxes'].pop(file, None)
|
655 |
st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
|
656 |
+
st.rerun()
|
657 |
|
658 |
update_gallery()
|
659 |
|