Spaces:

cogcorp
/

assignment1

Sleeping

App Files Files Community

assignment1 / app.py

cogcorp

Update app.py

5672db4 about 2 years ago

raw

history blame

2.8 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	import zipfile
	import os
	import io
	import nltk
	import openai
	import time


	import pip
	import subprocess
	import sys

	# install required libraries
	subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

	# download required NLTK data packages
	nltk.download('punkt')
	nltk.download('all') # or any other packages your project depends on








	# Put your OpenAI API key here
	openai.api_key = os.getenv('OpenAPI')

	def call_openai_api(prompt):
	max_retries = 3
	for attempt in range(max_retries):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt},
	]
	)
	return response['choices'][0]['message']['content']
	except Exception as e:
	if attempt < max_retries - 1: # if it's not the last attempt
	time.sleep(1) # wait for 1 seconds before retrying
	continue
	else:
	return str(e) # return the exception message after the last attempt

	def pdf_to_text(file, user_prompt):
	z = zipfile.ZipFile(file.name, 'r')
	texts = []
	for filename in z.namelist():
	if filename.endswith('.pdf'):
	pdf_file_data = z.read(filename)
	pdf_file_io = io.BytesIO(pdf_file_data)
	pdf = PdfReader(pdf_file_io)
	text = ''
	for page in pdf.pages:
	text += page.extract_text()
	# Tokenize text
	tokens = nltk.word_tokenize(text)
	# If tokens are more than 2000, split into chunks
	if len(tokens) > 2000:
	for i in range(0, len(tokens), 2000):
	chunk = tokens[i:i + 2000]
	chunk_str = ' '.join(chunk)
	# Using OpenAI API
	response = call_openai_api(chunk_str)
	texts.append(response)
	else:
	# Using OpenAI API
	response = call_openai_api(text)
	texts.append(response)
	return '\n'.join(texts)

	iface = gr.Interface(
	fn=pdf_to_text,
	inputs=[
	gr.inputs.File(label="PDF File (Upload a Zip file containing ONLY PDF files)"),
	gr.inputs.Textbox(label="User Prompt (Enter a prompt to guide the AI's responses)")
	],
	outputs=gr.outputs.Textbox(label="Cognitive Agent Response"),
	title="PDF Text Extractor",
	description="This app extracts knowledge from the uploaded Zip files. Using a Cognitive Agent you can interact with that knowledge."
	)
	iface.launch(share=False)