Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
import os
|
3 |
-
import
|
4 |
from pptx import Presentation
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
import torch
|
@@ -31,7 +31,7 @@ os.makedirs(IMAGE_FOLDER, exist_ok=True)
|
|
31 |
# Extract Text from PDF
|
32 |
def extract_text_from_pdf(pdf_path):
|
33 |
try:
|
34 |
-
doc =
|
35 |
text = " ".join(page.get_text() for page in doc)
|
36 |
return text.strip() if text else None
|
37 |
except Exception as e:
|
@@ -53,7 +53,7 @@ def extract_text_from_pptx(pptx_path):
|
|
53 |
# Extract Images from PDF
|
54 |
def extract_images_from_pdf(pdf_path):
|
55 |
try:
|
56 |
-
doc =
|
57 |
images = []
|
58 |
for i, page in enumerate(doc):
|
59 |
for img_index, img in enumerate(page.get_images(full=True)):
|
|
|
1 |
from fastapi import FastAPI
|
2 |
import os
|
3 |
+
import pymupdf # PyMuPDF
|
4 |
from pptx import Presentation
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
import torch
|
|
|
31 |
# Extract Text from PDF
|
32 |
def extract_text_from_pdf(pdf_path):
|
33 |
try:
|
34 |
+
doc = pymupdf.open(pdf_path)
|
35 |
text = " ".join(page.get_text() for page in doc)
|
36 |
return text.strip() if text else None
|
37 |
except Exception as e:
|
|
|
53 |
# Extract Images from PDF
|
54 |
def extract_images_from_pdf(pdf_path):
|
55 |
try:
|
56 |
+
doc = pymupdf.open(pdf_path)
|
57 |
images = []
|
58 |
for i, page in enumerate(doc):
|
59 |
for img_index, img in enumerate(page.get_images(full=True)):
|