Spaces:

danial0203
/

TextBookReviewer

Sleeping

App Files Files Community

TextBookReviewer / app.py

danial0203

Update app.py

8cd9426 verified over 1 year ago

raw

history blame

3.19 kB

	from pdf2image import convert_from_path
	from io import BytesIO
	import base64
	import requests
	import gradio as gr
	import os


	os.system("apt-get update")
	os.system("apt-get install poppler-utils")

	def convert_pdf_to_images(pdf_file):
	return convert_from_path(pdf_file)

	def encode_image_to_base64(image):
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	return base64.b64encode(buffered.getvalue()).decode("utf-8")

	def process_pdf(pdf_file, user_prompt):
	images = convert_pdf_to_images(pdf_file.name)
	content_responses = []

	api_key= os.getenv('OPENAI_API_KEY')
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}

	for image in images:
	base64_image = encode_image_to_base64(image)

	# Combine the predefined prompt with the user prompt
	combined_prompt = f"""{user_prompt} REPLY ONLY IN JSON FORMAT mentioned below. Perform OCR and look for mistakes, that can be spelling mistakes, grammar mistakes, contexual errors and wrong definitions. The spellings are according to British English, don't change them to American. Do not provide OCR if there's no mistake on the page. The output you provide should be only of the images you find mistakes in. For example, at one point the definition of NAME is wrong, fix all the similar and any other mistakes you find. The changes you make to these mistakes should be described what the mistake was and why it had to changed. Make sure to Identify the page number and write it in your response as well. DO NOT MISS ANY WRONG DEFINITIONS, ALL THE DATA SHOULD BE FIXED. JSON Respsonse Format:
	{{
	"page_number": "page number",
	"original_text": "original text",
	"corrected_text": "corrected text",
	}}

	"""

	payload = {
	"model": "gpt-4-vision-preview",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": combined_prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 300
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload).json()


	response_content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
	if response_content:
	# Parse the content to remove unwanted characters
	content_cleaned = response_content.replace("`", "").replace("json ", "").replace("{", "").replace("}", "").replace("\"", "")
	content_responses.append(content_cleaned)

	return content_responses



	iface = gr.Interface(
	fn=process_pdf,
	inputs=[gr.File(type="filepath"), gr.Textbox(label="Enter your custom prompt")],
	outputs="json"
	)

	iface.queue().launch(share=False)