princhman commited on
Commit
e1710da
·
verified ·
1 Parent(s): ac1404a

prompt update

Browse files
Files changed (1) hide show
  1. mineru_single.py +7 -4
mineru_single.py CHANGED
@@ -181,6 +181,7 @@ class ImageWriter(DataWriter):
181
  def call_gemini_for_image_description(image_data: bytes) -> str:
182
  """Convert image bytes to Gemini-compatible format and get description"""
183
  from google import genai
 
184
  import base64
185
 
186
  try:
@@ -193,16 +194,18 @@ def call_gemini_for_image_description(image_data: bytes) -> str:
193
  contents=[
194
  {
195
  "parts": [
196
- {"text": """The provided image is a part of a question paper or markscheme. Extract all the necessary information from the image to be able to identify the question.
197
- For example, if there is an image that contains text like: "Q1 Part A Answer: Life on earth was created by diety..." you should return "Q1 Part A Answer"
198
- If there is no text on this image, return the description of the image. 20 words max."""},
 
199
  {
200
  "inline_data": {
201
  "mime_type": "image/jpeg",
202
  "data": base64.b64encode(image_data).decode('utf-8')
203
  }
204
  }
205
- ]
 
206
  }
207
  ]
208
  )
 
181
  def call_gemini_for_image_description(image_data: bytes) -> str:
182
  """Convert image bytes to Gemini-compatible format and get description"""
183
  from google import genai
184
+ from google.genai import types
185
  import base64
186
 
187
  try:
 
194
  contents=[
195
  {
196
  "parts": [
197
+ {"text": """The provided image is a part of a question paper or markscheme. Extract all the necessary information from the image to be able to identify the question.
198
+ To identify the question, we only need the following: question number and question part. Don't include redundant information.
199
+ For example, if image contains text like: "Q1 Part A Answer: Life on earth was created by diety..." you should return just "Q1 Part A Mark Scheme"
200
+ If there is no text on this image, return the description of the image. 20 words max."""},
201
  {
202
  "inline_data": {
203
  "mime_type": "image/jpeg",
204
  "data": base64.b64encode(image_data).decode('utf-8')
205
  }
206
  }
207
+ ],
208
+ config=types.GenerateContentConfig(temperature=0.)
209
  }
210
  ]
211
  )