danial0203's picture
Update app.py
8cd9426 verified
raw
history blame
3.19 kB
from pdf2image import convert_from_path
from io import BytesIO
import base64
import requests
import gradio as gr
import os
os.system("apt-get update")
os.system("apt-get install poppler-utils")
def convert_pdf_to_images(pdf_file):
return convert_from_path(pdf_file)
def encode_image_to_base64(image):
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
def process_pdf(pdf_file, user_prompt):
images = convert_pdf_to_images(pdf_file.name)
content_responses = []
api_key= os.getenv('OPENAI_API_KEY')
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
for image in images:
base64_image = encode_image_to_base64(image)
# Combine the predefined prompt with the user prompt
combined_prompt = f"""{user_prompt} REPLY ONLY IN JSON FORMAT mentioned below. Perform OCR and look for mistakes, that can be spelling mistakes, grammar mistakes, contexual errors and wrong definitions. The spellings are according to British English, don't change them to American. Do not provide OCR if there's no mistake on the page. The output you provide should be only of the images you find mistakes in. For example, at one point the definition of NAME is wrong, fix all the similar and any other mistakes you find. The changes you make to these mistakes should be described what the mistake was and why it had to changed. Make sure to Identify the page number and write it in your response as well. DO NOT MISS ANY WRONG DEFINITIONS, ALL THE DATA SHOULD BE FIXED. JSON Respsonse Format:
{{
"page_number": "page number",
"original_text": "original text",
"corrected_text": "corrected text",
}}
"""
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": combined_prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload).json()
response_content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
if response_content:
# Parse the content to remove unwanted characters
content_cleaned = response_content.replace("`", "").replace("json ", "").replace("{", "").replace("}", "").replace("\"", "")
content_responses.append(content_cleaned)
return content_responses
iface = gr.Interface(
fn=process_pdf,
inputs=[gr.File(type="filepath"), gr.Textbox(label="Enter your custom prompt")],
outputs="json"
)
iface.queue().launch(share=False)