Spaces:

cogcorp
/

assignment1

Sleeping

App Files Files Community

assignment1 / app.py

cogcorp

Update app.py

44f782b about 2 years ago

raw

history blame

2.32 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	import zipfile
	import os
	import io
	import nltk
	import openai
	import pip
	import subprocess

	# install required libraries
	subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

	# download required NLTK data packages
	nltk.download('punkt')
	nltk.download('all')









	# Put your OpenAI API key here
	openai.api_key = os.getenv('OpenAPI')

	def pdf_to_text(file, user_prompt):
	z = zipfile.ZipFile(file.name, 'r')
	texts = []
	for filename in z.namelist():
	if filename.endswith('.pdf'):
	pdf_file_data = z.read(filename)
	pdf_file_io = io.BytesIO(pdf_file_data)
	pdf = PdfReader(pdf_file_io)
	text = ''
	for page in pdf.pages:
	text += page.extract_text()
	# Tokenize text
	tokens = nltk.word_tokenize(text)
	# If tokens are more than 2000, split into chunks
	if len(tokens) > 2000:
	for i in range(0, len(tokens), 2000):
	chunk = tokens[i:i + 2000]
	chunk_str = ' '.join(chunk)
	# Using OpenAI API
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": user_prompt},
	{"role": "user", "content": chunk_str},
	]
	)
	texts.append(response['choices'][0]['message']['content'])
	else:
	# Using OpenAI API
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": user_prompt},
	{"role": "user", "content": text},
	]
	)
	texts.append(response['choices'][0]['message']['content'])
	return '\n'.join(texts)

	iface = gr.Interface(fn=pdf_to_text, inputs=["file", "text"], outputs="text")
	iface.launch(share=False)