awacke1 commited on
Commit
99b2de2
·
verified ·
1 Parent(s): 2cbf123

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -20
app.py CHANGED
@@ -208,22 +208,31 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
208
  status.error(f"Failed to process PDF: {str(e)}")
209
  return []
210
 
211
- async def process_ocr(image, output_file):
212
  start_time = time.time()
213
  status = st.empty()
214
- status.text("Processing GOT-OCR2_0... (0s)")
215
- tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
216
- # Force CPU usage to avoid CUDA error until GPU setup is fixed
217
- model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
218
- temp_file = f"temp_{int(time.time())}.png"
219
- image.save(temp_file)
220
- result = model.chat(tokenizer, temp_file, ocr_type='ocr')
221
- os.remove(temp_file)
222
- elapsed = int(time.time() - start_time)
223
- status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
224
- async with aiofiles.open(output_file, "w") as f:
225
- await f.write(result)
226
- return result
 
 
 
 
 
 
 
 
 
227
 
228
  async def process_image_gen(prompt, output_file):
229
  start_time = time.time()
@@ -373,7 +382,7 @@ with tab_ocr:
373
  all_files = get_gallery_files()
374
  if all_files:
375
  if st.button("OCR All Assets 🚀"):
376
- full_text = "# OCR Results\n\n"
377
  for file in all_files:
378
  if file.endswith('.png'):
379
  image = Image.open(file)
@@ -383,7 +392,7 @@ with tab_ocr:
383
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
384
  doc.close()
385
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
386
- result = asyncio.run(process_ocr(image, output_file))
387
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
388
  entry = f"OCR Test: {file} -> {output_file}"
389
  st.session_state['history'].append(entry)
@@ -405,7 +414,7 @@ with tab_ocr:
405
  if st.button("Run OCR 🚀", key="ocr_run"):
406
  output_file = generate_filename("ocr_output", "txt")
407
  st.session_state['processing']['ocr'] = True
408
- result = asyncio.run(process_ocr(image, output_file))
409
  entry = f"OCR Test: {selected_file} -> {output_file}"
410
  st.session_state['history'].append(entry)
411
  st.text_area("OCR Result", result, height=200, key="ocr_result")
@@ -418,7 +427,7 @@ with tab_ocr:
418
  pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
419
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
420
  output_file = generate_filename(f"ocr_page_{i}", "txt")
421
- result = asyncio.run(process_ocr(image, output_file))
422
  full_text += f"## Page {i + 1}\n\n{result}\n\n"
423
  entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
424
  st.session_state['history'].append(entry)
@@ -454,7 +463,7 @@ with tab_build:
454
  entry = f"Built {model_type} model: {model_name}"
455
  st.session_state['history'].append(entry)
456
  st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
457
- st.experimental_rerun()
458
 
459
  with tab_imggen:
460
  st.header("Test Image Gen 🎨")
@@ -644,7 +653,7 @@ def update_gallery():
644
  os.remove(file)
645
  st.session_state['asset_checkboxes'].pop(file, None)
646
  st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
647
- st.experimental_rerun()
648
 
649
  update_gallery()
650
 
 
208
  status.error(f"Failed to process PDF: {str(e)}")
209
  return []
210
 
211
+ async def process_gpt4o_ocr(image, output_file):
212
  start_time = time.time()
213
  status = st.empty()
214
+ status.text("Processing GPT-4o OCR... (0s)")
215
+ buffered = BytesIO()
216
+ image.save(buffered, format="PNG")
217
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
218
+ messages = [{
219
+ "role": "user",
220
+ "content": [
221
+ {"type": "text", "text": "Extract the electronic text from this image."},
222
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": "auto"}}
223
+ ]
224
+ }]
225
+ try:
226
+ response = client.chat.completions.create(model="gpt-4o", messages=messages, max_tokens=300)
227
+ result = response.choices[0].message.content
228
+ elapsed = int(time.time() - start_time)
229
+ status.text(f"GPT-4o OCR completed in {elapsed}s!")
230
+ async with aiofiles.open(output_file, "w") as f:
231
+ await f.write(result)
232
+ return result
233
+ except Exception as e:
234
+ status.error(f"Failed to process image with GPT-4o: {str(e)}")
235
+ return ""
236
 
237
  async def process_image_gen(prompt, output_file):
238
  start_time = time.time()
 
382
  all_files = get_gallery_files()
383
  if all_files:
384
  if st.button("OCR All Assets 🚀"):
385
+ full_text = "# OCR Results (GPT-4o)\n\n"
386
  for file in all_files:
387
  if file.endswith('.png'):
388
  image = Image.open(file)
 
392
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
393
  doc.close()
394
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
395
+ result = asyncio.run(process_gpt4o_ocr(image, output_file))
396
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
397
  entry = f"OCR Test: {file} -> {output_file}"
398
  st.session_state['history'].append(entry)
 
414
  if st.button("Run OCR 🚀", key="ocr_run"):
415
  output_file = generate_filename("ocr_output", "txt")
416
  st.session_state['processing']['ocr'] = True
417
+ result = asyncio.run(process_gpt4o_ocr(image, output_file))
418
  entry = f"OCR Test: {selected_file} -> {output_file}"
419
  st.session_state['history'].append(entry)
420
  st.text_area("OCR Result", result, height=200, key="ocr_result")
 
427
  pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
428
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
429
  output_file = generate_filename(f"ocr_page_{i}", "txt")
430
+ result = asyncio.run(process_gpt4o_ocr(image, output_file))
431
  full_text += f"## Page {i + 1}\n\n{result}\n\n"
432
  entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
433
  st.session_state['history'].append(entry)
 
463
  entry = f"Built {model_type} model: {model_name}"
464
  st.session_state['history'].append(entry)
465
  st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
466
+ st.rerun()
467
 
468
  with tab_imggen:
469
  st.header("Test Image Gen 🎨")
 
653
  os.remove(file)
654
  st.session_state['asset_checkboxes'].pop(file, None)
655
  st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
656
+ st.rerun()
657
 
658
  update_gallery()
659