assignment1 / app.py
cogcorp's picture
Update app.py
fd8442d
raw
history blame
2.07 kB
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
def pdf_to_text(file, user_prompt):
z = zipfile.ZipFile(file.name, 'r')
texts = []
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
pdf = PdfReader(pdf_file_io)
text = ''
for page in pdf.pages:
text += page.extract_text()
# Tokenize text
tokens = nltk.word_tokenize(text)
# If tokens are more than 2000, split into chunks
if len(tokens) > 2000:
for i in range(0, len(tokens), 2000):
chunk = tokens[i:i + 2000]
chunk_str = ' '.join(chunk)
# Using OpenAI API
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_prompt},
{"role": "user", "content": chunk_str},
]
)
texts.append(response['choices'][0]['message']['content'])
else:
# Using OpenAI API
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_prompt},
{"role": "user", "content": text},
]
)
texts.append(response['choices'][0]['message']['content'])
return '\n'.join(texts)
iface = gr.Interface(fn=pdf_to_text, inputs=["file", "text"], outputs="text")
iface.launch(share=False)