Spaces:
Sleeping
Sleeping
prompt improved
Browse files
app.py
CHANGED
@@ -214,7 +214,47 @@ PROMPT_SKILLS = (
|
|
214 |
)
|
215 |
|
216 |
|
217 |
-
PROMPT_IMAGE = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
"You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
|
219 |
"Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
|
220 |
"If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
|
@@ -229,7 +269,23 @@ PROMPT_IMAGE = (
|
|
229 |
' "cumulative_gpa": "string",\n'
|
230 |
' "program": "string",\n'
|
231 |
' "status": "string"\n'
|
232 |
-
' }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
' "courses": [\n'
|
234 |
' {\n'
|
235 |
' "transfer_institution": "string",\n'
|
@@ -246,14 +302,15 @@ PROMPT_IMAGE = (
|
|
246 |
" ]\n"
|
247 |
"}\n\n"
|
248 |
"Instructions:\n\n"
|
249 |
-
"1. Extract the
|
250 |
"2. Use null for any missing or unavailable information.\n"
|
251 |
"3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
|
252 |
-
"4.
|
253 |
-
"5.
|
254 |
)
|
255 |
|
256 |
|
|
|
257 |
@app.route("/", methods=["GET"])
|
258 |
def home():
|
259 |
return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
|
@@ -287,10 +344,16 @@ def extract_info():
|
|
287 |
response_skills = ''
|
288 |
|
289 |
if data["img_url"] is not None:
|
290 |
-
|
|
|
291 |
img_url = data["img_url"]
|
292 |
file_pref = data["file_pref"]
|
293 |
-
|
|
|
|
|
|
|
|
|
|
|
294 |
else:
|
295 |
response_image = ''
|
296 |
|
|
|
214 |
)
|
215 |
|
216 |
|
217 |
+
# PROMPT_IMAGE = (
|
218 |
+
# "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
|
219 |
+
# "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
|
220 |
+
# "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
|
221 |
+
# "Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
|
222 |
+
# "Required JSON Format:\n\n"
|
223 |
+
# "{\n"
|
224 |
+
# ' "student": {\n'
|
225 |
+
# ' "name": "string",\n'
|
226 |
+
# ' "id": "string",\n'
|
227 |
+
# ' "dob": "string",\n'
|
228 |
+
# ' "original_start_date": "string",\n'
|
229 |
+
# ' "cumulative_gpa": "string",\n'
|
230 |
+
# ' "program": "string",\n'
|
231 |
+
# ' "status": "string"\n'
|
232 |
+
# ' },\n'
|
233 |
+
# ' "courses": [\n'
|
234 |
+
# ' {\n'
|
235 |
+
# ' "transfer_institution": "string",\n'
|
236 |
+
# ' "course_code": "string",\n'
|
237 |
+
# ' "course_name": "string",\n'
|
238 |
+
# ' "credits_attempted": number,\n'
|
239 |
+
# ' "credits_earned": number,\n'
|
240 |
+
# ' "grade": "string",\n'
|
241 |
+
# ' "quality_points": number,\n'
|
242 |
+
# ' "semester_code": "string",\n'
|
243 |
+
# ' "semester_dates": "string"\n'
|
244 |
+
# ' }\n'
|
245 |
+
# " // Additional courses can be added here\n"
|
246 |
+
# " ]\n"
|
247 |
+
# "}\n\n"
|
248 |
+
# "Instructions:\n\n"
|
249 |
+
# "1. Extract the student information and course details as displayed in the image.\n"
|
250 |
+
# "2. Use null for any missing or unavailable information.\n"
|
251 |
+
# "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
|
252 |
+
# "4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
|
253 |
+
# "5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
|
254 |
+
# )
|
255 |
+
|
256 |
+
|
257 |
+
PROMPT_IMAGE_STUDENT = (
|
258 |
"You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
|
259 |
"Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
|
260 |
"If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
|
|
|
269 |
' "cumulative_gpa": "string",\n'
|
270 |
' "program": "string",\n'
|
271 |
' "status": "string"\n'
|
272 |
+
' }\n'
|
273 |
+
"}\n\n"
|
274 |
+
"Instructions:\n\n"
|
275 |
+
"1. Extract the student's general information as displayed in the image.\n"
|
276 |
+
"2. Use null for any missing or unavailable information.\n"
|
277 |
+
"3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
|
278 |
+
"4. Ensure accurate field names and proper nesting.\n"
|
279 |
+
"5. Return only the 'student' section as JSON.\n"
|
280 |
+
)
|
281 |
+
|
282 |
+
PROMPT_IMAGE_COURSES = (
|
283 |
+
"You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
|
284 |
+
"Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
|
285 |
+
"If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
|
286 |
+
"Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
|
287 |
+
"Required JSON Format:\n\n"
|
288 |
+
"{\n"
|
289 |
' "courses": [\n'
|
290 |
' {\n'
|
291 |
' "transfer_institution": "string",\n'
|
|
|
302 |
" ]\n"
|
303 |
"}\n\n"
|
304 |
"Instructions:\n\n"
|
305 |
+
"1. Extract the course details as displayed in the image.\n"
|
306 |
"2. Use null for any missing or unavailable information.\n"
|
307 |
"3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
|
308 |
+
"4. Ensure accurate field names and proper nesting.\n"
|
309 |
+
"5. Return only the 'courses' section as JSON.\n"
|
310 |
)
|
311 |
|
312 |
|
313 |
+
|
314 |
@app.route("/", methods=["GET"])
|
315 |
def home():
|
316 |
return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
|
|
|
344 |
response_skills = ''
|
345 |
|
346 |
if data["img_url"] is not None:
|
347 |
+
prompt_student = f"{PROMPT_IMAGE_STUDENT}\n"
|
348 |
+
prompt_courses = f"{PROMPT_IMAGE_COURSES}\n"
|
349 |
img_url = data["img_url"]
|
350 |
file_pref = data["file_pref"]
|
351 |
+
response_student = predict_image(img_url, prompt_student, file_pref)
|
352 |
+
response_courses = predict_image(img_url, prompt_courses, file_pref)
|
353 |
+
# response_image = response_student + response_courses
|
354 |
+
|
355 |
+
response_image = {"student": response_student.get("student", {}), "courses": response_courses.get("courses", [])}
|
356 |
+
|
357 |
else:
|
358 |
response_image = ''
|
359 |
|