prompt update
Browse files- mineru_single.py +7 -4
mineru_single.py
CHANGED
@@ -181,6 +181,7 @@ class ImageWriter(DataWriter):
|
|
181 |
def call_gemini_for_image_description(image_data: bytes) -> str:
|
182 |
"""Convert image bytes to Gemini-compatible format and get description"""
|
183 |
from google import genai
|
|
|
184 |
import base64
|
185 |
|
186 |
try:
|
@@ -193,16 +194,18 @@ def call_gemini_for_image_description(image_data: bytes) -> str:
|
|
193 |
contents=[
|
194 |
{
|
195 |
"parts": [
|
196 |
-
{"text": """The provided image is a part of a question paper or markscheme. Extract all the necessary information from the image to be able to identify the question.
|
197 |
-
|
198 |
-
|
|
|
199 |
{
|
200 |
"inline_data": {
|
201 |
"mime_type": "image/jpeg",
|
202 |
"data": base64.b64encode(image_data).decode('utf-8')
|
203 |
}
|
204 |
}
|
205 |
-
]
|
|
|
206 |
}
|
207 |
]
|
208 |
)
|
|
|
181 |
def call_gemini_for_image_description(image_data: bytes) -> str:
|
182 |
"""Convert image bytes to Gemini-compatible format and get description"""
|
183 |
from google import genai
|
184 |
+
from google.genai import types
|
185 |
import base64
|
186 |
|
187 |
try:
|
|
|
194 |
contents=[
|
195 |
{
|
196 |
"parts": [
|
197 |
+
{"text": """The provided image is a part of a question paper or markscheme. Extract all the necessary information from the image to be able to identify the question.
|
198 |
+
To identify the question, we only need the following: question number and question part. Don't include redundant information.
|
199 |
+
For example, if image contains text like: "Q1 Part A Answer: Life on earth was created by diety..." you should return just "Q1 Part A Mark Scheme"
|
200 |
+
If there is no text on this image, return the description of the image. 20 words max."""},
|
201 |
{
|
202 |
"inline_data": {
|
203 |
"mime_type": "image/jpeg",
|
204 |
"data": base64.b64encode(image_data).decode('utf-8')
|
205 |
}
|
206 |
}
|
207 |
+
],
|
208 |
+
config=types.GenerateContentConfig(temperature=0.)
|
209 |
}
|
210 |
]
|
211 |
)
|