Spaces:
Sleeping
Sleeping
File size: 2,795 Bytes
5f07ce9 47125e3 0b87fda 5f07ce9 dc5e316 5f07ce9 5672db4 5f07ce9 dc5e316 5f07ce9 dc5e316 5f07ce9 dc5e316 5f07ce9 dc5e316 593d86e dc5e316 593d86e dc5e316 593d86e dc5e316 fd8442d dc5e316 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
import time
import pip
import subprocess
import sys
# install required libraries
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
# download required NLTK data packages
nltk.download('punkt')
nltk.download('all') # or any other packages your project depends on
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
def call_openai_api(prompt):
max_retries = 3
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 seconds before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def pdf_to_text(file, user_prompt):
z = zipfile.ZipFile(file.name, 'r')
texts = []
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
pdf = PdfReader(pdf_file_io)
text = ''
for page in pdf.pages:
text += page.extract_text()
# Tokenize text
tokens = nltk.word_tokenize(text)
# If tokens are more than 2000, split into chunks
if len(tokens) > 2000:
for i in range(0, len(tokens), 2000):
chunk = tokens[i:i + 2000]
chunk_str = ' '.join(chunk)
# Using OpenAI API
response = call_openai_api(chunk_str)
texts.append(response)
else:
# Using OpenAI API
response = call_openai_api(text)
texts.append(response)
return '\n'.join(texts)
iface = gr.Interface(
fn=pdf_to_text,
inputs=[
gr.inputs.File(label="PDF File (Upload a Zip file containing ONLY PDF files)"),
gr.inputs.Textbox(label="User Prompt (Enter a prompt to guide the AI's responses)")
],
outputs=gr.outputs.Textbox(label="Cognitive Agent Response"),
title="PDF Text Extractor",
description="This app extracts knowledge from the uploaded Zip files. Using a Cognitive Agent you can interact with that knowledge."
)
iface.launch(share=False)
|