Redmind commited on
Commit
46020d8
·
verified ·
1 Parent(s): 77175c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import FastAPI
2
  import os
3
- import fitz # PyMuPDF
4
  from pptx import Presentation
5
  from sentence_transformers import SentenceTransformer
6
  import torch
@@ -31,7 +31,7 @@ os.makedirs(IMAGE_FOLDER, exist_ok=True)
31
  # Extract Text from PDF
32
  def extract_text_from_pdf(pdf_path):
33
  try:
34
- doc = fitz.open(pdf_path)
35
  text = " ".join(page.get_text() for page in doc)
36
  return text.strip() if text else None
37
  except Exception as e:
@@ -53,7 +53,7 @@ def extract_text_from_pptx(pptx_path):
53
  # Extract Images from PDF
54
  def extract_images_from_pdf(pdf_path):
55
  try:
56
- doc = fitz.open(pdf_path)
57
  images = []
58
  for i, page in enumerate(doc):
59
  for img_index, img in enumerate(page.get_images(full=True)):
 
1
  from fastapi import FastAPI
2
  import os
3
+ import pymupdf # PyMuPDF
4
  from pptx import Presentation
5
  from sentence_transformers import SentenceTransformer
6
  import torch
 
31
  # Extract Text from PDF
32
  def extract_text_from_pdf(pdf_path):
33
  try:
34
+ doc = pymupdf.open(pdf_path)
35
  text = " ".join(page.get_text() for page in doc)
36
  return text.strip() if text else None
37
  except Exception as e:
 
53
  # Extract Images from PDF
54
  def extract_images_from_pdf(pdf_path):
55
  try:
56
+ doc = pymupdf.open(pdf_path)
57
  images = []
58
  for i, page in enumerate(doc):
59
  for img_index, img in enumerate(page.get_images(full=True)):