jojortz commited on
Commit
3966ab6
1 Parent(s): a9ae960

add exception handlers for GPT and Claude

Browse files
Files changed (1) hide show
  1. extractors/model.py +55 -52
extractors/model.py CHANGED
@@ -188,36 +188,39 @@ class GPTModel(Model):
188
  str: The extracted data.
189
  """
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
- pdf_preprocessor = PdfPreprocessor()
193
- gpt_postprocessor = GPTPostprocessor()
194
- file_contents = pdf_preprocessor.run(file_path)
195
- contents = []
196
- for content in file_contents:
197
- contents.append(
198
- {
199
- "type": "image_url",
200
- "image_url": {
201
- "url": f"data:image/jpeg;base64,{content}",
202
- },
203
- })
204
-
205
- messages = [
206
- {
207
- "role": "user",
208
- "content": [
209
- {"type": "text", "text": "Convert this image to markdown"},
210
- *contents,
211
- ],
212
- }
213
- ]
214
-
215
- response = self._client.chat.completions.create(
216
- model=self.MODEL,
217
- messages=messages,
218
- )
219
 
220
- return gpt_postprocessor.run(response.choices[0].message.content)
 
 
 
221
 
222
  class ClaudeModel(Model):
223
  BASE_URL = "http://103.114.163.134:3000/v1/"
@@ -247,31 +250,31 @@ class ClaudeModel(Model):
247
  str: The extracted data.
248
  """
249
 
250
- prompt = "Convert this image to markdown."
251
- pdf_preprocessor = PdfPreprocessor()
252
- claude_postprocessor = ClaudePostprocessor()
253
- file_contents = pdf_preprocessor.run(file_path)
254
-
255
- contents = []
256
- for content in file_contents:
257
- contents.append(
258
- {
259
- "type": "image",
260
- "source": {
261
- "type": "base64",
262
- "media_type": "image/jpeg",
263
- "data": content,
264
- }
265
- })
266
-
267
- messages = [
268
- {"role": "user", "content": [
269
- {"type": "text", "text": prompt},
270
- *contents,
271
- ]}
272
- ]
273
-
274
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  response = self._client.messages.create(
276
  model="claude-3-5-sonnet-20240620", max_tokens=1024, messages=messages
277
  )
 
188
  str: The extracted data.
189
  """
190
 
191
+ try:
192
+ pdf_preprocessor = PdfPreprocessor()
193
+ gpt_postprocessor = GPTPostprocessor()
194
+ file_contents = pdf_preprocessor.run(file_path)
195
+ contents = []
196
+ for content in file_contents:
197
+ contents.append(
198
+ {
199
+ "type": "image_url",
200
+ "image_url": {
201
+ "url": f"data:image/jpeg;base64,{content}",
202
+ },
203
+ })
204
 
205
+ messages = [
206
+ {
207
+ "role": "user",
208
+ "content": [
209
+ {"type": "text", "text": "Convert this image to markdown"},
210
+ *contents,
211
+ ],
212
+ }
213
+ ]
214
+
215
+ response = self._client.chat.completions.create(
216
+ model=self.MODEL,
217
+ messages=messages,
218
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
+ return gpt_postprocessor.run(response.choices[0].message.content)
221
+ except Exception as e:
222
+ print(f"Error processing input: {str(e)}")
223
+ return f"Error processing with GPTModel: {str(e)}"
224
 
225
  class ClaudeModel(Model):
226
  BASE_URL = "http://103.114.163.134:3000/v1/"
 
250
  str: The extracted data.
251
  """
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  try:
254
+ prompt = "Convert this image to markdown."
255
+ pdf_preprocessor = PdfPreprocessor()
256
+ claude_postprocessor = ClaudePostprocessor()
257
+ file_contents = pdf_preprocessor.run(file_path)
258
+
259
+ contents = []
260
+ for content in file_contents:
261
+ contents.append(
262
+ {
263
+ "type": "image",
264
+ "source": {
265
+ "type": "base64",
266
+ "media_type": "image/jpeg",
267
+ "data": content,
268
+ }
269
+ })
270
+
271
+ messages = [
272
+ {"role": "user", "content": [
273
+ {"type": "text", "text": prompt},
274
+ *contents,
275
+ ]}
276
+ ]
277
+
278
  response = self._client.messages.create(
279
  model="claude-3-5-sonnet-20240620", max_tokens=1024, messages=messages
280
  )