Spaces:
Sleeping
Sleeping
add exception handlers for GPT and Claude
Browse files- extractors/model.py +55 -52
extractors/model.py
CHANGED
@@ -188,36 +188,39 @@ class GPTModel(Model):
|
|
188 |
str: The extracted data.
|
189 |
"""
|
190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
{
|
207 |
-
"role": "user",
|
208 |
-
"content": [
|
209 |
-
{"type": "text", "text": "Convert this image to markdown"},
|
210 |
-
*contents,
|
211 |
-
],
|
212 |
-
}
|
213 |
-
]
|
214 |
-
|
215 |
-
response = self._client.chat.completions.create(
|
216 |
-
model=self.MODEL,
|
217 |
-
messages=messages,
|
218 |
-
)
|
219 |
|
220 |
-
|
|
|
|
|
|
|
221 |
|
222 |
class ClaudeModel(Model):
|
223 |
BASE_URL = "http://103.114.163.134:3000/v1/"
|
@@ -247,31 +250,31 @@ class ClaudeModel(Model):
|
|
247 |
str: The extracted data.
|
248 |
"""
|
249 |
|
250 |
-
prompt = "Convert this image to markdown."
|
251 |
-
pdf_preprocessor = PdfPreprocessor()
|
252 |
-
claude_postprocessor = ClaudePostprocessor()
|
253 |
-
file_contents = pdf_preprocessor.run(file_path)
|
254 |
-
|
255 |
-
contents = []
|
256 |
-
for content in file_contents:
|
257 |
-
contents.append(
|
258 |
-
{
|
259 |
-
"type": "image",
|
260 |
-
"source": {
|
261 |
-
"type": "base64",
|
262 |
-
"media_type": "image/jpeg",
|
263 |
-
"data": content,
|
264 |
-
}
|
265 |
-
})
|
266 |
-
|
267 |
-
messages = [
|
268 |
-
{"role": "user", "content": [
|
269 |
-
{"type": "text", "text": prompt},
|
270 |
-
*contents,
|
271 |
-
]}
|
272 |
-
]
|
273 |
-
|
274 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
response = self._client.messages.create(
|
276 |
model="claude-3-5-sonnet-20240620", max_tokens=1024, messages=messages
|
277 |
)
|
|
|
188 |
str: The extracted data.
|
189 |
"""
|
190 |
|
191 |
+
try:
|
192 |
+
pdf_preprocessor = PdfPreprocessor()
|
193 |
+
gpt_postprocessor = GPTPostprocessor()
|
194 |
+
file_contents = pdf_preprocessor.run(file_path)
|
195 |
+
contents = []
|
196 |
+
for content in file_contents:
|
197 |
+
contents.append(
|
198 |
+
{
|
199 |
+
"type": "image_url",
|
200 |
+
"image_url": {
|
201 |
+
"url": f"data:image/jpeg;base64,{content}",
|
202 |
+
},
|
203 |
+
})
|
204 |
|
205 |
+
messages = [
|
206 |
+
{
|
207 |
+
"role": "user",
|
208 |
+
"content": [
|
209 |
+
{"type": "text", "text": "Convert this image to markdown"},
|
210 |
+
*contents,
|
211 |
+
],
|
212 |
+
}
|
213 |
+
]
|
214 |
+
|
215 |
+
response = self._client.chat.completions.create(
|
216 |
+
model=self.MODEL,
|
217 |
+
messages=messages,
|
218 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
+
return gpt_postprocessor.run(response.choices[0].message.content)
|
221 |
+
except Exception as e:
|
222 |
+
print(f"Error processing input: {str(e)}")
|
223 |
+
return f"Error processing with GPTModel: {str(e)}"
|
224 |
|
225 |
class ClaudeModel(Model):
|
226 |
BASE_URL = "http://103.114.163.134:3000/v1/"
|
|
|
250 |
str: The extracted data.
|
251 |
"""
|
252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
try:
|
254 |
+
prompt = "Convert this image to markdown."
|
255 |
+
pdf_preprocessor = PdfPreprocessor()
|
256 |
+
claude_postprocessor = ClaudePostprocessor()
|
257 |
+
file_contents = pdf_preprocessor.run(file_path)
|
258 |
+
|
259 |
+
contents = []
|
260 |
+
for content in file_contents:
|
261 |
+
contents.append(
|
262 |
+
{
|
263 |
+
"type": "image",
|
264 |
+
"source": {
|
265 |
+
"type": "base64",
|
266 |
+
"media_type": "image/jpeg",
|
267 |
+
"data": content,
|
268 |
+
}
|
269 |
+
})
|
270 |
+
|
271 |
+
messages = [
|
272 |
+
{"role": "user", "content": [
|
273 |
+
{"type": "text", "text": prompt},
|
274 |
+
*contents,
|
275 |
+
]}
|
276 |
+
]
|
277 |
+
|
278 |
response = self._client.messages.create(
|
279 |
model="claude-3-5-sonnet-20240620", max_tokens=1024, messages=messages
|
280 |
)
|