Spaces:
Sleeping
Sleeping
image from pdf
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ from PyPDF2 import PdfReader
|
|
10 |
from threading import Thread
|
11 |
from flask import Flask, request, jsonify
|
12 |
import io
|
|
|
13 |
|
14 |
# Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled
|
15 |
# IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
|
@@ -64,17 +65,56 @@ processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
|
|
64 |
# return response
|
65 |
|
66 |
|
67 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
try:
|
69 |
# Download the image from the URL
|
70 |
# response = requests.get(image_url)
|
71 |
# response.raise_for_status() # Raise an error for invalid responses
|
72 |
# image = Image.open(io.BytesIO(response.content)).convert("RGB")
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
78 |
messages = [
|
79 |
{"role": "user", "content": [
|
80 |
{"type": "image"}, # Specify that an image is provided
|
@@ -200,6 +240,7 @@ PROMPT = (
|
|
200 |
"'Course Name:'\n"
|
201 |
"'Course Description:'\n"
|
202 |
"'Course Credits:'\n"
|
|
|
203 |
"'Delivery Method:'\n"
|
204 |
"'Prerequisite(s):'\n"
|
205 |
"'Co-requisite(s):'\n"
|
@@ -263,7 +304,8 @@ def extract_info():
|
|
263 |
if data["img_url"] is not None:
|
264 |
prompt_skills = f"{PROMPT_IMAGE}\n"
|
265 |
img_url = data["img_url"]
|
266 |
-
|
|
|
267 |
else:
|
268 |
response_image = ''
|
269 |
|
|
|
10 |
from threading import Thread
|
11 |
from flask import Flask, request, jsonify
|
12 |
import io
|
13 |
+
import fitz
|
14 |
|
15 |
# Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled
|
16 |
# IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
|
|
|
65 |
# return response
|
66 |
|
67 |
|
68 |
+
def extract_image_from_pdf(pdf_url, dpi=75):
|
69 |
+
"""
|
70 |
+
Extract first page of PDF as image in memory
|
71 |
+
|
72 |
+
Args:
|
73 |
+
pdf_url (str): URL of PDF
|
74 |
+
dpi (int): Image resolution
|
75 |
+
|
76 |
+
Returns:
|
77 |
+
PIL.Image: First page as image or None
|
78 |
+
"""
|
79 |
+
try:
|
80 |
+
# Download PDF
|
81 |
+
response = requests.get(pdf_url, timeout=30)
|
82 |
+
response.raise_for_status()
|
83 |
+
|
84 |
+
# Open PDF from bytes
|
85 |
+
pdf_document = fitz.open(stream=response.content, filetype="pdf")
|
86 |
+
|
87 |
+
# Get first page
|
88 |
+
first_page = pdf_document[0]
|
89 |
+
|
90 |
+
# Render page to pixmap
|
91 |
+
pix = first_page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72))
|
92 |
+
|
93 |
+
# Convert to PIL Image
|
94 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
95 |
+
|
96 |
+
pdf_document.close()
|
97 |
+
return img
|
98 |
+
|
99 |
+
except Exception as e:
|
100 |
+
print(f"Error extracting first page: {e}")
|
101 |
+
return None
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
def predict_image(image_url, text, file_pref):
|
106 |
try:
|
107 |
# Download the image from the URL
|
108 |
# response = requests.get(image_url)
|
109 |
# response.raise_for_status() # Raise an error for invalid responses
|
110 |
# image = Image.open(io.BytesIO(response.content)).convert("RGB")
|
111 |
+
if file_pref == 'img':
|
112 |
+
response = requests.get(image_url)
|
113 |
+
response.raise_for_status() # Raise an error for invalid responses
|
114 |
+
image = Image.open(io.BytesIO(response.content)).convert("RGB")
|
115 |
+
else:
|
116 |
+
image = extract_image_from_pdf(image_url)
|
117 |
+
|
118 |
messages = [
|
119 |
{"role": "user", "content": [
|
120 |
{"type": "image"}, # Specify that an image is provided
|
|
|
240 |
"'Course Name:'\n"
|
241 |
"'Course Description:'\n"
|
242 |
"'Course Credits:'\n"
|
243 |
+
"'Course Learning Outcomes:'\n"
|
244 |
"'Delivery Method:'\n"
|
245 |
"'Prerequisite(s):'\n"
|
246 |
"'Co-requisite(s):'\n"
|
|
|
304 |
if data["img_url"] is not None:
|
305 |
prompt_skills = f"{PROMPT_IMAGE}\n"
|
306 |
img_url = data["img_url"]
|
307 |
+
file_pref = data["file_pref"]
|
308 |
+
response_image = predict_image(img_url, prompt_skills, file_pref)
|
309 |
else:
|
310 |
response_image = ''
|
311 |
|