assignment1 / app.py
cogcorp's picture
Update app.py
44f782b
raw
history blame
2.32 kB
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
import pip
import subprocess
# install required libraries
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
# download required NLTK data packages
nltk.download('punkt')
nltk.download('all')
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
def pdf_to_text(file, user_prompt):
z = zipfile.ZipFile(file.name, 'r')
texts = []
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
pdf = PdfReader(pdf_file_io)
text = ''
for page in pdf.pages:
text += page.extract_text()
# Tokenize text
tokens = nltk.word_tokenize(text)
# If tokens are more than 2000, split into chunks
if len(tokens) > 2000:
for i in range(0, len(tokens), 2000):
chunk = tokens[i:i + 2000]
chunk_str = ' '.join(chunk)
# Using OpenAI API
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_prompt},
{"role": "user", "content": chunk_str},
]
)
texts.append(response['choices'][0]['message']['content'])
else:
# Using OpenAI API
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_prompt},
{"role": "user", "content": text},
]
)
texts.append(response['choices'][0]['message']['content'])
return '\n'.join(texts)
iface = gr.Interface(fn=pdf_to_text, inputs=["file", "text"], outputs="text")
iface.launch(share=False)