Spaces:
Sleeping
Sleeping
File size: 2,074 Bytes
5f07ce9 47125e3 0b87fda 5f07ce9 fd8442d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
def pdf_to_text(file, user_prompt):
z = zipfile.ZipFile(file.name, 'r')
texts = []
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
pdf = PdfReader(pdf_file_io)
text = ''
for page in pdf.pages:
text += page.extract_text()
# Tokenize text
tokens = nltk.word_tokenize(text)
# If tokens are more than 2000, split into chunks
if len(tokens) > 2000:
for i in range(0, len(tokens), 2000):
chunk = tokens[i:i + 2000]
chunk_str = ' '.join(chunk)
# Using OpenAI API
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_prompt},
{"role": "user", "content": chunk_str},
]
)
texts.append(response['choices'][0]['message']['content'])
else:
# Using OpenAI API
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_prompt},
{"role": "user", "content": text},
]
)
texts.append(response['choices'][0]['message']['content'])
return '\n'.join(texts)
iface = gr.Interface(fn=pdf_to_text, inputs=["file", "text"], outputs="text")
iface.launch(share=False)
|