Spaces:
Sleeping
Sleeping
updated the prompt for Transcript
Browse files
app.py
CHANGED
@@ -41,30 +41,6 @@ model = MllamaForConditionalGeneration.from_pretrained(
|
|
41 |
# model.to(device)
|
42 |
processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
|
43 |
|
44 |
-
# @spaces.GPU # Use the free GPU provided by Hugging Face Spaces
|
45 |
-
# def predict(image, text):
|
46 |
-
# # Prepare the input messages
|
47 |
-
# messages = [
|
48 |
-
# {"role": "user", "content": [
|
49 |
-
# {"type": "image"}, # Specify that an image is provided
|
50 |
-
# {"type": "text", "text": text} # Add the user-provided text input
|
51 |
-
# ]}
|
52 |
-
# ]
|
53 |
-
|
54 |
-
# # Create the input text using the processor's chat template
|
55 |
-
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
56 |
-
|
57 |
-
# # Process the inputs and move to the appropriate device
|
58 |
-
# inputs = processor(image, input_text, return_tensors="pt").to(device)
|
59 |
-
|
60 |
-
# # Generate a response from the model
|
61 |
-
# outputs = model.generate(**inputs, max_new_tokens=100)
|
62 |
-
|
63 |
-
# # Decode the output to return the final response
|
64 |
-
# response = processor.decode(outputs[0], skip_special_tokens=True)
|
65 |
-
# return response
|
66 |
-
|
67 |
-
|
68 |
def extract_image_from_pdf(pdf_url, dpi=75):
|
69 |
"""
|
70 |
Extract first page of PDF as image in memory
|
@@ -133,41 +109,6 @@ def predict_image(image_url, text, file_pref):
|
|
133 |
# Decode the output to return the final response
|
134 |
response = processor.decode(outputs[0], skip_special_tokens=True)
|
135 |
|
136 |
-
# # Prepare the input messages
|
137 |
-
# messages = [
|
138 |
-
# {"role": "user", "content": [
|
139 |
-
# {"type": "image"}, # Specify that an image is provided
|
140 |
-
# {"type": "text", "text": text} # Add the user-provided text input
|
141 |
-
# ]}
|
142 |
-
# ]
|
143 |
-
|
144 |
-
# # Create the input text using the processor's chat template
|
145 |
-
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
146 |
-
|
147 |
-
# # Process the inputs and move to the appropriate device
|
148 |
-
# inputs = processor(image=image, text=input_text, return_tensors="pt").to("cuda")
|
149 |
-
|
150 |
-
# Generate a response from the model
|
151 |
-
# outputs = model.generate(**inputs, max_new_tokens=100)
|
152 |
-
|
153 |
-
# # Decode the output to return the final response
|
154 |
-
# response = processor.decode(outputs[0], skip_special_tokens=True)
|
155 |
-
|
156 |
-
# streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
|
157 |
-
|
158 |
-
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|
159 |
-
# generated_text = ""
|
160 |
-
|
161 |
-
# thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
162 |
-
# thread.start()
|
163 |
-
# buffer = ""
|
164 |
-
|
165 |
-
# for new_text in streamer:
|
166 |
-
# buffer += new_text
|
167 |
-
# # generated_text_without_prompt = buffer
|
168 |
-
# # # time.sleep(0.01)
|
169 |
-
# # yield buffer
|
170 |
-
|
171 |
# return buffer
|
172 |
return response
|
173 |
|
@@ -258,20 +199,42 @@ PROMPT_SKILLS = (
|
|
258 |
|
259 |
|
260 |
PROMPT_IMAGE = (
|
261 |
-
"
|
262 |
-
"'
|
263 |
-
"
|
264 |
-
"
|
265 |
-
"
|
266 |
-
"
|
267 |
-
"
|
268 |
-
"'
|
269 |
-
"
|
270 |
-
"
|
271 |
-
"
|
272 |
-
"
|
273 |
-
"
|
274 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
)
|
276 |
|
277 |
|
@@ -299,8 +262,11 @@ def extract_info():
|
|
299 |
response = ''
|
300 |
|
301 |
if data["skills"] == True:
|
302 |
-
|
303 |
-
|
|
|
|
|
|
|
304 |
else:
|
305 |
response_skills = ''
|
306 |
|
|
|
41 |
# model.to(device)
|
42 |
processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def extract_image_from_pdf(pdf_url, dpi=75):
|
45 |
"""
|
46 |
Extract first page of PDF as image in memory
|
|
|
109 |
# Decode the output to return the final response
|
110 |
response = processor.decode(outputs[0], skip_special_tokens=True)
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
# return buffer
|
113 |
return response
|
114 |
|
|
|
199 |
|
200 |
|
201 |
PROMPT_IMAGE = (
|
202 |
+
"You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
|
203 |
+
"Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
|
204 |
+
"If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
|
205 |
+
"Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
|
206 |
+
"Required JSON Format:\n\n"
|
207 |
+
"{\n"
|
208 |
+
' "student": {\n'
|
209 |
+
' "name": "string",\n'
|
210 |
+
' "id": "string",\n'
|
211 |
+
' "dob": "string",\n'
|
212 |
+
' "original_start_date": "string",\n'
|
213 |
+
' "cumulative_gpa": "string",\n'
|
214 |
+
' "program": "string",\n'
|
215 |
+
' "status": "string"\n'
|
216 |
+
' },\n'
|
217 |
+
' "courses": [\n'
|
218 |
+
' {\n'
|
219 |
+
' "transfer_institution": "string",\n'
|
220 |
+
' "course_code": "string",\n'
|
221 |
+
' "course_name": "string",\n'
|
222 |
+
' "credits_attempted": number,\n'
|
223 |
+
' "credits_earned": number,\n'
|
224 |
+
' "grade": "string",\n'
|
225 |
+
' "quality_points": number,\n'
|
226 |
+
' "semester_code": "string",\n'
|
227 |
+
' "semester_dates": "string"\n'
|
228 |
+
' }\n'
|
229 |
+
" // Additional courses can be added here\n"
|
230 |
+
" ]\n"
|
231 |
+
"}\n\n"
|
232 |
+
"Instructions:\n\n"
|
233 |
+
"1. Extract the student information and course details as displayed in the image.\n"
|
234 |
+
"2. Use null for any missing or unavailable information.\n"
|
235 |
+
"3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
|
236 |
+
"4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
|
237 |
+
"5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
|
238 |
)
|
239 |
|
240 |
|
|
|
262 |
response = ''
|
263 |
|
264 |
if data["skills"] == True:
|
265 |
+
if response:
|
266 |
+
prompt_skills = f"{PROMPT_SKILLS} using this information only -- {response}"
|
267 |
+
response_skills = predict_text(prompt_skills)
|
268 |
+
else:
|
269 |
+
response_skills = ''
|
270 |
else:
|
271 |
response_skills = ''
|
272 |
|