import gradio as gr from PyPDF2 import PdfReader import zipfile import os import io import nltk import openai # Put your OpenAI API key here openai.api_key = os.getenv('OpenAPI') def pdf_to_text(file, user_prompt): z = zipfile.ZipFile(file.name, 'r') texts = [] for filename in z.namelist(): if filename.endswith('.pdf'): pdf_file_data = z.read(filename) pdf_file_io = io.BytesIO(pdf_file_data) pdf = PdfReader(pdf_file_io) text = '' for page in pdf.pages: text += page.extract_text() # Tokenize text tokens = nltk.word_tokenize(text) # If tokens are more than 2000, split into chunks if len(tokens) > 2000: for i in range(0, len(tokens), 2000): chunk = tokens[i:i + 2000] chunk_str = ' '.join(chunk) # Using OpenAI API response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": user_prompt}, {"role": "user", "content": chunk_str}, ] ) texts.append(response['choices'][0]['message']['content']) else: # Using OpenAI API response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": user_prompt}, {"role": "user", "content": text}, ] ) texts.append(response['choices'][0]['message']['content']) return '\n'.join(texts) iface = gr.Interface(fn=pdf_to_text, inputs=["file", "text"], outputs="text") iface.launch(share=False)