danial0203 commited on
Commit
6e44813
·
verified ·
1 Parent(s): f850e87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -32
app.py CHANGED
@@ -12,6 +12,9 @@ import json
12
  os.system("apt-get update")
13
  os.system("apt-get install poppler-utils")
14
 
 
 
 
15
  # Function to convert PDF to images or open a single image
16
  def get_images(file_path):
17
  images = []
@@ -32,21 +35,25 @@ def encode_image_to_base64(image):
32
  image.save(buffered, format="JPEG")
33
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
34
 
 
35
  def process_files_fixed(image_path, page_identifier, error_pages):
36
- api_key= os.getenv('OPENAI_API_KEY')
37
  headers = {
38
  "Content-Type": "application/json",
39
  "Authorization": f"Bearer {api_key}"
40
  }
41
 
42
- # read the image and create image object
43
- image = Image.open(image_path)
44
- base64_image = encode_image_to_base64(image)
 
 
 
 
45
 
46
  prompt = """Perform OCR on this image. Analyze the table in the provided image, focusing on the first five columns labeled S.No, Reg #, Roll No. and Marks. Get the marks from the 6th column and write them in fifth column in integers, make sure to check them as well for correct integer number, I don't want any mistakes in the obtained marks. In case the table headers are not visible or not present, assume the mentioned order for the columns. Extract and list the data only from these columns, omitting any additional columns that may be present. But DO NOT skip any row from the table, extract all the rows present in the table. The obtained marks are written in both integral and written format as well. Verify both for better ocr in integers.
47
 
48
- Return the response in the following JSoN response format:
49
- ```
50
  {
51
  "data": [
52
  {
@@ -55,16 +62,9 @@ Return the response in the following JSoN response format:
55
  "Roll_No": "2345234",
56
  "Marks": "20"
57
  },
58
- {
59
- "S_No": "2",
60
- "Reg": "059288",
61
- "Roll_No": "2345235",
62
- "Marks": "25"
63
- },
64
  ...
65
  ]
66
- }
67
- ```"""
68
 
69
  payload = {
70
  "model": "gpt-4-vision-preview",
@@ -78,43 +78,43 @@ Return the response in the following JSoN response format:
78
  },
79
  {
80
  "type": "image_url",
81
- "image_url": {
82
- "url": f"data:image/jpeg;base64,{base64_image}",
83
- "detail": "high",
84
- }
85
  }
86
  ]
87
  }
88
  ],
89
- "max_tokens": 4096,
90
  }
91
 
92
- response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
93
-
94
  try:
 
95
  if response.status_code == 200:
96
  json_response = response.json()
97
  response_content = json_response["choices"][0]["message"]["content"]
98
-
99
  if response_content:
100
- json_string = response_content[response_content.find("{"): response_content.rfind("}") + 1]
101
- json_data = json.loads(json_string)
102
- if "data" in json_data and json_data["data"]:
103
- return json_data["data"]
104
- else:
105
- print(f"No records found in page/file: {page_identifier}")
 
 
 
 
 
106
  error_pages.append(page_identifier)
107
  return []
108
  else:
109
- print(f"No content in JSON response for page/file: {page_identifier}")
110
  error_pages.append(page_identifier)
111
  return []
112
  else:
113
- print(f"Error in API call for page/file: {page_identifier}")
114
  error_pages.append(page_identifier)
115
  return []
116
- except Exception as e:
117
- print(f"Exception processing page/file {page_identifier}: {e}")
118
  error_pages.append(page_identifier)
119
  return []
120
 
 
12
  os.system("apt-get update")
13
  os.system("apt-get install poppler-utils")
14
 
15
+ logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
16
+
17
+
18
  # Function to convert PDF to images or open a single image
19
  def get_images(file_path):
20
  images = []
 
35
  image.save(buffered, format="JPEG")
36
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
37
 
38
+
39
  def process_files_fixed(image_path, page_identifier, error_pages):
40
+ api_key = os.getenv('OPENAI_API_KEY')
41
  headers = {
42
  "Content-Type": "application/json",
43
  "Authorization": f"Bearer {api_key}"
44
  }
45
 
46
+ try:
47
+ image = Image.open(image_path)
48
+ base64_image = encode_image_to_base64(image)
49
+ except Exception as e:
50
+ logging.error(f"Failed to process image at {image_path}: {e}")
51
+ error_pages.append(page_identifier)
52
+ return []
53
 
54
  prompt = """Perform OCR on this image. Analyze the table in the provided image, focusing on the first five columns labeled S.No, Reg #, Roll No. and Marks. Get the marks from the 6th column and write them in fifth column in integers, make sure to check them as well for correct integer number, I don't want any mistakes in the obtained marks. In case the table headers are not visible or not present, assume the mentioned order for the columns. Extract and list the data only from these columns, omitting any additional columns that may be present. But DO NOT skip any row from the table, extract all the rows present in the table. The obtained marks are written in both integral and written format as well. Verify both for better ocr in integers.
55
 
56
+ Return the response in the following JSON response format:
 
57
  {
58
  "data": [
59
  {
 
62
  "Roll_No": "2345234",
63
  "Marks": "20"
64
  },
 
 
 
 
 
 
65
  ...
66
  ]
67
+ }"""
 
68
 
69
  payload = {
70
  "model": "gpt-4-vision-preview",
 
78
  },
79
  {
80
  "type": "image_url",
81
+ "image_url": f"data:image/jpeg;base64,{base64_image}"
 
 
 
82
  }
83
  ]
84
  }
85
  ],
86
+ "max_tokens": 4096
87
  }
88
 
 
 
89
  try:
90
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
91
  if response.status_code == 200:
92
  json_response = response.json()
93
  response_content = json_response["choices"][0]["message"]["content"]
 
94
  if response_content:
95
+ try:
96
+ json_string = response_content[response_content.find("{"): response_content.rfind("}") + 1]
97
+ json_data = json.loads(json_string)
98
+ if "data" in json_data and json_data["data"]:
99
+ return json_data["data"]
100
+ else:
101
+ logging.error(f"No records found in page/file: {page_identifier}")
102
+ error_pages.append(page_identifier)
103
+ return []
104
+ except json.JSONDecodeError:
105
+ logging.error(f"JSON parsing error in response for page/file {page_identifier}")
106
  error_pages.append(page_identifier)
107
  return []
108
  else:
109
+ logging.error(f"No content in JSON response for page/file {page_identifier}")
110
  error_pages.append(page_identifier)
111
  return []
112
  else:
113
+ logging.error(f"Error in API call for page/file {page_identifier}: HTTP {response.status_code} - {response.text}")
114
  error_pages.append(page_identifier)
115
  return []
116
+ except requests.exceptions.RequestException as e:
117
+ logging.error(f"Network or API error when processing page/file {page_identifier}: {e}")
118
  error_pages.append(page_identifier)
119
  return []
120