khurrameycon commited on
Commit
09fd8b1
·
verified ·
1 Parent(s): d228bc3

updated the prompt for Transcript

Browse files
Files changed (1) hide show
  1. app.py +41 -75
app.py CHANGED
@@ -41,30 +41,6 @@ model = MllamaForConditionalGeneration.from_pretrained(
41
  # model.to(device)
42
  processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
43
 
44
- # @spaces.GPU # Use the free GPU provided by Hugging Face Spaces
45
- # def predict(image, text):
46
- # # Prepare the input messages
47
- # messages = [
48
- # {"role": "user", "content": [
49
- # {"type": "image"}, # Specify that an image is provided
50
- # {"type": "text", "text": text} # Add the user-provided text input
51
- # ]}
52
- # ]
53
-
54
- # # Create the input text using the processor's chat template
55
- # input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
56
-
57
- # # Process the inputs and move to the appropriate device
58
- # inputs = processor(image, input_text, return_tensors="pt").to(device)
59
-
60
- # # Generate a response from the model
61
- # outputs = model.generate(**inputs, max_new_tokens=100)
62
-
63
- # # Decode the output to return the final response
64
- # response = processor.decode(outputs[0], skip_special_tokens=True)
65
- # return response
66
-
67
-
68
  def extract_image_from_pdf(pdf_url, dpi=75):
69
  """
70
  Extract first page of PDF as image in memory
@@ -133,41 +109,6 @@ def predict_image(image_url, text, file_pref):
133
  # Decode the output to return the final response
134
  response = processor.decode(outputs[0], skip_special_tokens=True)
135
 
136
- # # Prepare the input messages
137
- # messages = [
138
- # {"role": "user", "content": [
139
- # {"type": "image"}, # Specify that an image is provided
140
- # {"type": "text", "text": text} # Add the user-provided text input
141
- # ]}
142
- # ]
143
-
144
- # # Create the input text using the processor's chat template
145
- # input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
146
-
147
- # # Process the inputs and move to the appropriate device
148
- # inputs = processor(image=image, text=input_text, return_tensors="pt").to("cuda")
149
-
150
- # Generate a response from the model
151
- # outputs = model.generate(**inputs, max_new_tokens=100)
152
-
153
- # # Decode the output to return the final response
154
- # response = processor.decode(outputs[0], skip_special_tokens=True)
155
-
156
- # streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
157
-
158
- # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
159
- # generated_text = ""
160
-
161
- # thread = Thread(target=model.generate, kwargs=generation_kwargs)
162
- # thread.start()
163
- # buffer = ""
164
-
165
- # for new_text in streamer:
166
- # buffer += new_text
167
- # # generated_text_without_prompt = buffer
168
- # # # time.sleep(0.01)
169
- # # yield buffer
170
-
171
  # return buffer
172
  return response
173
 
@@ -258,20 +199,42 @@ PROMPT_SKILLS = (
258
 
259
 
260
  PROMPT_IMAGE = (
261
- "Extract the following information from this image:\n"
262
- "'Student Name'\n"
263
- "'Transfer Institution'\n"
264
- "'Course Code'\n"
265
- "'Course Name'\n"
266
- "'Credits Attempted'\n"
267
- "'Credits Earned'\n"
268
- "'Grade'\n"
269
- "'Quality Points'\n"
270
- "'Semester Code'\n"
271
- "'Semester Dates'\n"
272
- "'Program or Major'\n"
273
- "'Cumulative GPA'\n"
274
- "Only provide the requested information without adding any extra details."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  )
276
 
277
 
@@ -299,8 +262,11 @@ def extract_info():
299
  response = ''
300
 
301
  if data["skills"] == True:
302
- prompt_skills = f"{PROMPT_SKILLS} using this information only -- {response}"
303
- response_skills = predict_text(prompt_skills)
 
 
 
304
  else:
305
  response_skills = ''
306
 
 
41
  # model.to(device)
42
  processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def extract_image_from_pdf(pdf_url, dpi=75):
45
  """
46
  Extract first page of PDF as image in memory
 
109
  # Decode the output to return the final response
110
  response = processor.decode(outputs[0], skip_special_tokens=True)
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # return buffer
113
  return response
114
 
 
199
 
200
 
201
  PROMPT_IMAGE = (
202
+ "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
203
+ "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
204
+ "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
205
+ "Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
206
+ "Required JSON Format:\n\n"
207
+ "{\n"
208
+ ' "student": {\n'
209
+ ' "name": "string",\n'
210
+ ' "id": "string",\n'
211
+ ' "dob": "string",\n'
212
+ ' "original_start_date": "string",\n'
213
+ ' "cumulative_gpa": "string",\n'
214
+ ' "program": "string",\n'
215
+ ' "status": "string"\n'
216
+ ' },\n'
217
+ ' "courses": [\n'
218
+ ' {\n'
219
+ ' "transfer_institution": "string",\n'
220
+ ' "course_code": "string",\n'
221
+ ' "course_name": "string",\n'
222
+ ' "credits_attempted": number,\n'
223
+ ' "credits_earned": number,\n'
224
+ ' "grade": "string",\n'
225
+ ' "quality_points": number,\n'
226
+ ' "semester_code": "string",\n'
227
+ ' "semester_dates": "string"\n'
228
+ ' }\n'
229
+ " // Additional courses can be added here\n"
230
+ " ]\n"
231
+ "}\n\n"
232
+ "Instructions:\n\n"
233
+ "1. Extract the student information and course details as displayed in the image.\n"
234
+ "2. Use null for any missing or unavailable information.\n"
235
+ "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
236
+ "4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
237
+ "5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
238
  )
239
 
240
 
 
262
  response = ''
263
 
264
  if data["skills"] == True:
265
+ if response:
266
+ prompt_skills = f"{PROMPT_SKILLS} using this information only -- {response}"
267
+ response_skills = predict_text(prompt_skills)
268
+ else:
269
+ response_skills = ''
270
  else:
271
  response_skills = ''
272