awacke1 commited on
Commit
4cf1d23
·
verified ·
1 Parent(s): 246df35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -68
app.py CHANGED
@@ -13,7 +13,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
13
  from diffusers import StableDiffusionPipeline
14
  from torch.utils.data import Dataset, DataLoader
15
  import csv
16
- from pdf2image import convert_from_path
17
  import requests
18
  from PIL import Image
19
  import cv2
@@ -342,6 +342,77 @@ def download_pdf(url, output_path):
342
  logger.error(f"Failed to download {url}: {e}")
343
  return False
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  # Mock Search Tool for RAG
346
  def mock_search(query: str) -> str:
347
  if "superhero" in query.lower():
@@ -423,73 +494,6 @@ def calculate_cargo_travel_time(origin_coords: Tuple[float, float], destination_
423
  flight_time = (actual_distance / cruising_speed_kmh) + 1.0
424
  return round(flight_time, 2)
425
 
426
- # Async Processing Functions
427
- async def process_pdf_snapshot(pdf_path, mode="thumbnail"):
428
- start_time = time.time()
429
- status = st.empty()
430
- status.text(f"Processing PDF Snapshot ({mode})... (0s)")
431
- try:
432
- images = convert_from_path(pdf_path, dpi=200)
433
- output_files = []
434
- if mode == "thumbnail":
435
- img = images[0].resize((int(images[0].width * 0.5), int(images[0].height * 0.5)), Image.Resampling.LANCZOS)
436
- output_file = generate_filename("thumbnail", "png")
437
- img.save(output_file)
438
- output_files.append(output_file)
439
- elif mode == "twopage":
440
- for i in range(min(2, len(images))):
441
- output_file = generate_filename(f"twopage_{i}", "png")
442
- images[i].save(output_file)
443
- output_files.append(output_file)
444
- elapsed = int(time.time() - start_time)
445
- status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
446
- update_gallery()
447
- return output_files
448
- except Exception as e:
449
- status.error(f"Failed to process PDF: {str(e)}. Install poppler-utils (e.g., 'sudo apt-get install poppler-utils' on Ubuntu) and ensure it's in PATH.")
450
- return []
451
-
452
- async def process_ocr(image, output_file):
453
- start_time = time.time()
454
- status = st.empty()
455
- status.text("Processing GOT-OCR2_0... (0s)")
456
- tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
457
- model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
458
- result = model.chat(tokenizer, image, ocr_type='ocr')
459
- elapsed = int(time.time() - start_time)
460
- status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
461
- async with aiofiles.open(output_file, "w") as f:
462
- await f.write(result)
463
- update_gallery()
464
- return result
465
-
466
- async def process_image_gen(prompt, output_file):
467
- start_time = time.time()
468
- status = st.empty()
469
- status.text("Processing Image Gen... (0s)")
470
- pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
471
- gen_image = pipeline(prompt, num_inference_steps=20).images[0]
472
- elapsed = int(time.time() - start_time)
473
- status.text(f"Image Gen completed in {elapsed}s!")
474
- gen_image.save(output_file)
475
- update_gallery()
476
- return gen_image
477
-
478
- async def process_custom_diffusion(images, output_file, model_name):
479
- start_time = time.time()
480
- status = st.empty()
481
- status.text(f"Training {model_name}... (0s)")
482
- unet = TinyUNet()
483
- diffusion = TinyDiffusion(unet)
484
- diffusion.train(images)
485
- gen_image = diffusion.generate()
486
- upscaled_image = diffusion.upscale(gen_image, scale_factor=2)
487
- elapsed = int(time.time() - start_time)
488
- status.text(f"{model_name} completed in {elapsed}s!")
489
- upscaled_image.save(output_file)
490
- update_gallery()
491
- return upscaled_image
492
-
493
  # Main App
494
  st.title("AI Vision & SFT Titans 🚀")
495
 
 
13
  from diffusers import StableDiffusionPipeline
14
  from torch.utils.data import Dataset, DataLoader
15
  import csv
16
+ import fitz # PyMuPDF, pure Python library
17
  import requests
18
  from PIL import Image
19
  import cv2
 
342
  logger.error(f"Failed to download {url}: {e}")
343
  return False
344
 
345
+ # Async Processing Functions
346
+ async def process_pdf_snapshot(pdf_path, mode="thumbnail"):
347
+ start_time = time.time()
348
+ status = st.empty()
349
+ status.text(f"Processing PDF Snapshot ({mode})... (0s)")
350
+ try:
351
+ doc = fitz.open(pdf_path)
352
+ output_files = []
353
+ if mode == "thumbnail":
354
+ page = doc[0]
355
+ pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5)) # 50% scale
356
+ output_file = generate_filename("thumbnail", "png")
357
+ pix.save(output_file)
358
+ output_files.append(output_file)
359
+ elif mode == "twopage":
360
+ for i in range(min(2, len(doc))):
361
+ page = doc[i]
362
+ pix = page.get_pixmap(matrix=fitz.Matrix(1.0, 1.0)) # Full scale
363
+ output_file = generate_filename(f"twopage_{i}", "png")
364
+ pix.save(output_file)
365
+ output_files.append(output_file)
366
+ doc.close()
367
+ elapsed = int(time.time() - start_time)
368
+ status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
369
+ update_gallery()
370
+ return output_files
371
+ except Exception as e:
372
+ status.error(f"Failed to process PDF: {str(e)}")
373
+ return []
374
+
375
+ async def process_ocr(image, output_file):
376
+ start_time = time.time()
377
+ status = st.empty()
378
+ status.text("Processing GOT-OCR2_0... (0s)")
379
+ tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
380
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
381
+ result = model.chat(tokenizer, image, ocr_type='ocr')
382
+ elapsed = int(time.time() - start_time)
383
+ status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
384
+ async with aiofiles.open(output_file, "w") as f:
385
+ await f.write(result)
386
+ update_gallery()
387
+ return result
388
+
389
+ async def process_image_gen(prompt, output_file):
390
+ start_time = time.time()
391
+ status = st.empty()
392
+ status.text("Processing Image Gen... (0s)")
393
+ pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
394
+ gen_image = pipeline(prompt, num_inference_steps=20).images[0]
395
+ elapsed = int(time.time() - start_time)
396
+ status.text(f"Image Gen completed in {elapsed}s!")
397
+ gen_image.save(output_file)
398
+ update_gallery()
399
+ return gen_image
400
+
401
+ async def process_custom_diffusion(images, output_file, model_name):
402
+ start_time = time.time()
403
+ status = st.empty()
404
+ status.text(f"Training {model_name}... (0s)")
405
+ unet = TinyUNet()
406
+ diffusion = TinyDiffusion(unet)
407
+ diffusion.train(images)
408
+ gen_image = diffusion.generate()
409
+ upscaled_image = diffusion.upscale(gen_image, scale_factor=2)
410
+ elapsed = int(time.time() - start_time)
411
+ status.text(f"{model_name} completed in {elapsed}s!")
412
+ upscaled_image.save(output_file)
413
+ update_gallery()
414
+ return upscaled_image
415
+
416
  # Mock Search Tool for RAG
417
  def mock_search(query: str) -> str:
418
  if "superhero" in query.lower():
 
494
  flight_time = (actual_distance / cruising_speed_kmh) + 1.0
495
  return round(flight_time, 2)
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  # Main App
498
  st.title("AI Vision & SFT Titans 🚀")
499