assignment1 / app.py
cogcorp's picture
Update app.py
954c6c1
raw
history blame
3.49 kB
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
import time
import pip
import subprocess
import sys
# install required libraries
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
# download required NLTK data packages
nltk.download('punkt')
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
def create_persona(text):
max_retries = 5
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a knowledgeable assistant."},
{"role": "user", "content": f"Create a persona based on this text: {text}"},
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 seconds before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def call_openai_api(persona, user_prompt):
max_retries = 5
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"You are a knowledgeable assistant that provides short factual answers"},
{"role": "user", "content": f"{persona}{user_prompt}",
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 seconds before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def pdf_to_text(file, user_prompt):
z = zipfile.ZipFile(file.name, 'r')
aggregated_text = ''
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
pdf = PdfReader(pdf_file_io)
for page in pdf.pages:
aggregated_text += page.extract_text()
# Tokenize aggregated_text
tokens = nltk.word_tokenize(aggregated_text)
# Split into chunks if tokens are more than 4096
if len(tokens) > 4096:
# Here you may choose the strategy that fits best.
# For instance, the first 4096 tokens could be used.
tokens = tokens[:4096]
# Create a single persona from all text
persona = create_persona(' '.join(tokens))
# Using OpenAI API
response = call_openai_api(persona, user_prompt)
return response
iface = gr.Interface(
fn=pdf_to_text,
inputs=[
gr.inputs.File(label="PDF File (Upload a Zip file containing ONLY PDF files)"),
gr.inputs.Textbox(label="User Prompt (Enter a prompt to interact with your persona)")
],
outputs=gr.outputs.Textbox(label="Cognitive Agent Response"),
title="Ask An Expert Proof Of Concept",
description="This app extracts knowledge from the uploaded Zip files. The Cognitive Agent will use this data to build your unique persona."
)
iface.launch(share=False)