khurrameycon commited on
Commit
db40c74
·
verified ·
1 Parent(s): fddb1a0

prompt improved

Browse files
Files changed (1) hide show
  1. app.py +70 -7
app.py CHANGED
@@ -214,7 +214,47 @@ PROMPT_SKILLS = (
214
  )
215
 
216
 
217
- PROMPT_IMAGE = (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
219
  "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
220
  "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
@@ -229,7 +269,23 @@ PROMPT_IMAGE = (
229
  ' "cumulative_gpa": "string",\n'
230
  ' "program": "string",\n'
231
  ' "status": "string"\n'
232
- ' },\n'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  ' "courses": [\n'
234
  ' {\n'
235
  ' "transfer_institution": "string",\n'
@@ -246,14 +302,15 @@ PROMPT_IMAGE = (
246
  " ]\n"
247
  "}\n\n"
248
  "Instructions:\n\n"
249
- "1. Extract the student information and course details as displayed in the image.\n"
250
  "2. Use null for any missing or unavailable information.\n"
251
  "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
252
- "4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
253
- "5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
254
  )
255
 
256
 
 
257
  @app.route("/", methods=["GET"])
258
  def home():
259
  return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
@@ -287,10 +344,16 @@ def extract_info():
287
  response_skills = ''
288
 
289
  if data["img_url"] is not None:
290
- prompt_skills = f"{PROMPT_IMAGE}\n"
 
291
  img_url = data["img_url"]
292
  file_pref = data["file_pref"]
293
- response_image = predict_image(img_url, prompt_skills, file_pref)
 
 
 
 
 
294
  else:
295
  response_image = ''
296
 
 
214
  )
215
 
216
 
217
+ # PROMPT_IMAGE = (
218
+ # "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
219
+ # "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
220
+ # "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
221
+ # "Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
222
+ # "Required JSON Format:\n\n"
223
+ # "{\n"
224
+ # ' "student": {\n'
225
+ # ' "name": "string",\n'
226
+ # ' "id": "string",\n'
227
+ # ' "dob": "string",\n'
228
+ # ' "original_start_date": "string",\n'
229
+ # ' "cumulative_gpa": "string",\n'
230
+ # ' "program": "string",\n'
231
+ # ' "status": "string"\n'
232
+ # ' },\n'
233
+ # ' "courses": [\n'
234
+ # ' {\n'
235
+ # ' "transfer_institution": "string",\n'
236
+ # ' "course_code": "string",\n'
237
+ # ' "course_name": "string",\n'
238
+ # ' "credits_attempted": number,\n'
239
+ # ' "credits_earned": number,\n'
240
+ # ' "grade": "string",\n'
241
+ # ' "quality_points": number,\n'
242
+ # ' "semester_code": "string",\n'
243
+ # ' "semester_dates": "string"\n'
244
+ # ' }\n'
245
+ # " // Additional courses can be added here\n"
246
+ # " ]\n"
247
+ # "}\n\n"
248
+ # "Instructions:\n\n"
249
+ # "1. Extract the student information and course details as displayed in the image.\n"
250
+ # "2. Use null for any missing or unavailable information.\n"
251
+ # "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
252
+ # "4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
253
+ # "5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
254
+ # )
255
+
256
+
257
+ PROMPT_IMAGE_STUDENT = (
258
  "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
259
  "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
260
  "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
 
269
  ' "cumulative_gpa": "string",\n'
270
  ' "program": "string",\n'
271
  ' "status": "string"\n'
272
+ ' }\n'
273
+ "}\n\n"
274
+ "Instructions:\n\n"
275
+ "1. Extract the student's general information as displayed in the image.\n"
276
+ "2. Use null for any missing or unavailable information.\n"
277
+ "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
278
+ "4. Ensure accurate field names and proper nesting.\n"
279
+ "5. Return only the 'student' section as JSON.\n"
280
+ )
281
+
282
+ PROMPT_IMAGE_COURSES = (
283
+ "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
284
+ "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
285
+ "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
286
+ "Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
287
+ "Required JSON Format:\n\n"
288
+ "{\n"
289
  ' "courses": [\n'
290
  ' {\n'
291
  ' "transfer_institution": "string",\n'
 
302
  " ]\n"
303
  "}\n\n"
304
  "Instructions:\n\n"
305
+ "1. Extract the course details as displayed in the image.\n"
306
  "2. Use null for any missing or unavailable information.\n"
307
  "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
308
+ "4. Ensure accurate field names and proper nesting.\n"
309
+ "5. Return only the 'courses' section as JSON.\n"
310
  )
311
 
312
 
313
+
314
  @app.route("/", methods=["GET"])
315
  def home():
316
  return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
 
344
  response_skills = ''
345
 
346
  if data["img_url"] is not None:
347
+ prompt_student = f"{PROMPT_IMAGE_STUDENT}\n"
348
+ prompt_courses = f"{PROMPT_IMAGE_COURSES}\n"
349
  img_url = data["img_url"]
350
  file_pref = data["file_pref"]
351
+ response_student = predict_image(img_url, prompt_student, file_pref)
352
+ response_courses = predict_image(img_url, prompt_courses, file_pref)
353
+ # response_image = response_student + response_courses
354
+
355
+ response_image = {"student": response_student.get("student", {}), "courses": response_courses.get("courses", [])}
356
+
357
  else:
358
  response_image = ''
359