import gradio as gr from PyPDF2 import PdfReader import zipfile import os import io import nltk import openai import time import pip import subprocess import sys # install required libraries subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"]) # download required NLTK data packages nltk.download('punkt') # Put your OpenAI API key here openai.api_key = os.getenv('OpenAPI') def create_persona(text): max_retries = 5 for attempt in range(max_retries): try: response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a knowledgeable assistant."}, {"role": "user", "content": f"Create a persona based on this text: {text}"}, ] ) return response['choices'][0]['message']['content'] except Exception as e: if attempt < max_retries - 1: # if it's not the last attempt time.sleep(1) # wait for 1 seconds before retrying continue else: return str(e) # return the exception message after the last attempt def call_openai_api(persona, user_prompt): max_retries = 5 for attempt in range(max_retries): try: response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": f"You are {persona}"}, {"role": "user", "content": f"""Ignore all previous instructions. As a Cognitive AI Agent your persona is:{persona} You will answer only as an expert in cognitive workplace and Humancentric leadership. All answers must relate to HumanCentric services or cognitive workplace. {user_prompt}"""}, ] ) return response['choices'][0]['message']['content'] except Exception as e: if attempt < max_retries - 1: # if it's not the last attempt time.sleep(1) # wait for 1 seconds before retrying continue else: return str(e) # return the exception message after the last attempt def pdf_to_text(file, user_prompt): z = zipfile.ZipFile(file.name, 'r') aggregated_text = '' for filename in z.namelist(): if filename.endswith('.pdf'): pdf_file_data = z.read(filename) pdf_file_io = io.BytesIO(pdf_file_data) pdf = PdfReader(pdf_file_io) for page in pdf.pages: aggregated_text += page.extract_text() # Tokenize aggregated_text tokens = nltk.word_tokenize(aggregated_text) # Split into chunks if tokens are more than 4096 if len(tokens) > 4096: # Here you may choose the strategy that fits best. # For instance, the first 4096 tokens could be used. tokens = tokens[:4096] # Create a single persona from all text persona = create_persona(' '.join(tokens)) # Using OpenAI API response = call_openai_api(persona, user_prompt) return response iface = gr.Interface( fn=pdf_to_text, inputs=[ gr.inputs.File(label="PDF File (Upload a Zip file containing ONLY PDF files)"), gr.inputs.Textbox(label="User Prompt (Enter a prompt to interact with your persona)") ], outputs=gr.outputs.Textbox(label="Cognitive Agent Response"), title="Ask An Expert Proof Of Concept", description="This app extracts knowledge from the uploaded Zip files. The Cognitive Agent will use this data to build your unique persona." ) iface.launch(share=False)