Spaces:

cogcorp
/

assignment1

Sleeping

App Files Files Community

assignment1 / app.py

cogcorp

Update app.py

d48fb2c about 2 years ago

raw

history blame

3.95 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	import zipfile
	import os
	import io
	import nltk
	import openai
	import time
	import pip
	import subprocess
	import sys






	# install required libraries
	subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

	# download required NLTK data packages
	nltk.download('punkt')

	# Put your OpenAI API key here
	openai.api_key = os.getenv('OpenAPI')

	def create_persona(text):
	max_retries = 5
	for attempt in range(max_retries):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are an expert at summarizing content to provide a factual persona."},
	{"role": "user", "content": f"Create a persona based on this text: {text}"},
	]
	)
	return response['choices'][0]['message']['content']
	except Exception as e:
	if attempt < max_retries - 1: # if it's not the last attempt
	time.sleep(1) # wait for 1 seconds before retrying
	continue
	else:
	return str(e) # return the exception message after the last attempt

	def call_openai_api(persona, user_prompt):
	max_retries = 5
	for attempt in range(max_retries):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": f"You are {persona}"},
	{"role": "user", "content": f"""Ignore all previous instructions. As a Cognitive AI Agent your persona is:{persona}
	You will answer only as an expert within your persona.
	All answers must relate to your persona. {user_prompt}"""},
	]
	)
	return response['choices'][0]['message']['content']
	except Exception as e:
	if attempt < max_retries - 1: # if it's not the last attempt
	time.sleep(1) # wait for 1 seconds before retrying
	continue
	else:
	return str(e) # return the exception message after the last attempt



	def pdf_to_text(file, user_prompt):
	z = zipfile.ZipFile(file.name, 'r')
	aggregated_text = ''
	for filename in z.namelist():
	if filename.endswith('.pdf'):
	pdf_file_data = z.read(filename)
	pdf_file_io = io.BytesIO(pdf_file_data)
	pdf = PdfReader(pdf_file_io)
	for page in pdf.pages:
	aggregated_text += page.extract_text()
	# Tokenize aggregated_text
	tokens = nltk.word_tokenize(aggregated_text)
	# Split into chunks if tokens are more than 4096
	while len(tokens) > 4096:
	# Here you may choose the strategy that fits best.
	# For instance, the first 4096 tokens could be used.
	chunk = tokens[:4096]
	chunk_text = ' '.join(chunk)
	# Use OpenAI API to summarize the chunk
	summary = call_openai_api("a professional summarizer", f"Please summarize this text: {chunk_text}")
	# Replace the original chunk with the summary
	tokens = summary.split() + tokens[4096:]
	# Create a single persona from all text
	persona = create_persona(' '.join(tokens))
	# Using OpenAI API
	response = call_openai_api(persona, user_prompt)
	return response



	iface = gr.Interface(
	fn=pdf_to_text,
	inputs=[
	gr.inputs.File(label="PDF File (Upload a Zip file containing ONLY PDF files)"),
	gr.inputs.Textbox(label="User Prompt (Enter a prompt to interact with your persona)")
	],
	outputs=gr.outputs.Textbox(label="Cognitive Agent Response"),
	title="Ask An Expert Proof Of Concept",
	description="This app extracts knowledge from the uploaded Zip files. The Cognitive Agent will use this data to build your unique persona."
	)
	iface.launch(share=False)