assignment1 / app.py
cogcorp's picture
Update app.py
593d86e
raw
history blame
2.49 kB
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
import time
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
def call_openai_api(prompt):
max_retries = 3
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 seconds before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def pdf_to_text(file, user_prompt):
z = zipfile.ZipFile(file.name, 'r')
texts = []
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
pdf = PdfReader(pdf_file_io)
text = ''
for page in pdf.pages:
text += page.extract_text()
# Tokenize text
tokens = nltk.word_tokenize(text)
# If tokens are more than 2000, split into chunks
if len(tokens) > 2000:
for i in range(0, len(tokens), 2000):
chunk = tokens[i:i + 2000]
chunk_str = ' '.join(chunk)
# Using OpenAI API
response = call_openai_api(chunk_str)
texts.append(response)
else:
# Using OpenAI API
response = call_openai_api(text)
texts.append(response)
return '\n'.join(texts)
iface = gr.Interface(
fn=pdf_to_text,
inputs=[
gr.inputs.File(label="PDF File (Upload a Zip file containing ONLY PDF files)"),
gr.inputs.Textbox(label="User Prompt (Enter a prompt to guide the AI's responses)")
],
outputs=gr.outputs.Textbox(label="Cognitive Agent Response"),
title="PDF Text Extractor",
description="This app extracts knowledge from the uploaded Zip files. Using a Cognitive Agent you can interact with that knowledge."
)
iface.launch(share=False)