Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -54,7 +54,13 @@ def process_files_fixed(image_path, page_identifier, error_pages):
|
|
54 |
error_pages.append(page_identifier)
|
55 |
return []
|
56 |
|
57 |
-
prompt = """Perform OCR on this image. Analyze the table in the provided image, focusing on TOTAL OF ELEVEN COLUMNS labeled S.No,
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
Return the response in the following JSON response format:
|
59 |
{
|
60 |
"data": [
|
@@ -71,9 +77,28 @@ def process_files_fixed(image_path, page_identifier, error_pages):
|
|
71 |
"S.CNIC": "61101-8018797-4",
|
72 |
"M.Name": "Nasira"
|
73 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
...
|
75 |
]
|
76 |
-
}
|
|
|
|
|
|
|
|
|
77 |
|
78 |
payload = {
|
79 |
"model": "gpt-4-vision-preview",
|
@@ -99,11 +124,12 @@ def process_files_fixed(image_path, page_identifier, error_pages):
|
|
99 |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
100 |
logging.info(f"Full API response: {response.text}")
|
101 |
if response.status_code == 200:
|
102 |
-
|
103 |
-
|
104 |
-
if
|
105 |
try:
|
106 |
-
|
|
|
107 |
if "data" in json_data and json_data["data"]:
|
108 |
return json_data["data"]
|
109 |
else:
|
@@ -115,7 +141,7 @@ def process_files_fixed(image_path, page_identifier, error_pages):
|
|
115 |
error_pages.append(page_identifier)
|
116 |
return []
|
117 |
else:
|
118 |
-
logging.error(f"No
|
119 |
error_pages.append(page_identifier)
|
120 |
return []
|
121 |
else:
|
@@ -127,6 +153,8 @@ def process_files_fixed(image_path, page_identifier, error_pages):
|
|
127 |
error_pages.append(page_identifier)
|
128 |
return []
|
129 |
|
|
|
|
|
130 |
def process_pdf_and_generate_csv(file_path):
|
131 |
error_pages = [] # Initialize the list to track error pages or files
|
132 |
images = get_images(file_path)
|
|
|
54 |
error_pages.append(page_identifier)
|
55 |
return []
|
56 |
|
57 |
+
prompt = """Perform OCR on this image. Analyze the table in the provided image, focusing on TOTAL OF ELEVEN COLUMNS labeled S.No,
|
58 |
+
Admission No., Date of Admission, Name of Student, Father's Name, Date of Birth, Telephone No., Address, F.CNIC, S.CNIC and M.Name.
|
59 |
+
Get the Telephone No. from the last column, ignore office and residence column under it and write them.
|
60 |
+
For F.CNIC, S.CNIC and M.Name you will find this under REMARKS column. I don't want any mistakes in the obtained data.
|
61 |
+
In case the table headers are not visible or not present, assume the mentioned order for the columns.
|
62 |
+
Extract and list the data only from these columns, omitting any additional columns that may be present.But DO NOT skip any row from the table, extract all the rows present in the table.
|
63 |
+
I REPEAR DO NOT SKIP ANY ROW FROM THE TABLES OR ANY COLUMNS AS MENTIONED. AND GIVE THE RESPONSE IN THE PROPER JSON FORMAT AS MENTIONED
|
64 |
Return the response in the following JSON response format:
|
65 |
{
|
66 |
"data": [
|
|
|
77 |
"S.CNIC": "61101-8018797-4",
|
78 |
"M.Name": "Nasira"
|
79 |
},
|
80 |
+
|
81 |
+
{
|
82 |
+
"S_No": "2",
|
83 |
+
"Admission No.": "1640",
|
84 |
+
"Date of Admission": "05-10-20",
|
85 |
+
"Name of Student": "Areej Jibran",
|
86 |
+
"Father's Name": "M.Jibran",
|
87 |
+
"Date of Birth": "05-04-14",
|
88 |
+
"Telephone No.": "03335173534",
|
89 |
+
"Address": "H#65 st#11 G11/I isb",
|
90 |
+
"F. CNIC": "37405-0393951-3",
|
91 |
+
"S.CNIC": "37405-5642572-3",
|
92 |
+
"M.Name": "Taqdees Jibran"
|
93 |
+
|
94 |
+
}
|
95 |
...
|
96 |
]
|
97 |
+
}
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
"""
|
102 |
|
103 |
payload = {
|
104 |
"model": "gpt-4-vision-preview",
|
|
|
124 |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
125 |
logging.info(f"Full API response: {response.text}")
|
126 |
if response.status_code == 200:
|
127 |
+
json_response = response.json()
|
128 |
+
response_content = json_response["choices"][0]["message"]["content"]
|
129 |
+
if response_content:
|
130 |
try:
|
131 |
+
json_string = response_content[response_content.find("{"): response_content.rfind("}") + 1]
|
132 |
+
json_data = json.loads(json_string)
|
133 |
if "data" in json_data and json_data["data"]:
|
134 |
return json_data["data"]
|
135 |
else:
|
|
|
141 |
error_pages.append(page_identifier)
|
142 |
return []
|
143 |
else:
|
144 |
+
logging.error(f"No content in JSON response for page/file {page_identifier}")
|
145 |
error_pages.append(page_identifier)
|
146 |
return []
|
147 |
else:
|
|
|
153 |
error_pages.append(page_identifier)
|
154 |
return []
|
155 |
|
156 |
+
|
157 |
+
|
158 |
def process_pdf_and_generate_csv(file_path):
|
159 |
error_pages = [] # Initialize the list to track error pages or files
|
160 |
images = get_images(file_path)
|