import streamlit as st import langchain from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain import OpenAI, VectorDBQA from langchain.chains import RetrievalQAWithSourcesChain import PyPDF2 api_key = os.environ["OPENAI_API_KEY"] #This function will go through pdf and extract and return list of page texts. def read_and_textify(files): text_list = [] sources_list = [] for file in files: pdfReader = PyPDF2.PdfReader(file) #print("Page Number:", len(pdfReader.pages)) for i in range(len(pdfReader.pages)): pageObj = pdfReader.pages[i] text = pageObj.extract_text() pageObj.clear() text_list.append(text) sources_list.append(file.name + "_page_"+str(i)) return [text_list,sources_list] st.set_page_config(layout="centered", page_title="Multidoc_QnA") st.header("Multidoc_QnA") st.write("---") #file uploader uploaded_files = st.file_uploader("Upload documents",accept_multiple_files=True, type=["txt","pdf"]) st.write("---") if uploaded_files is None: st.info(f"""Upload files to analyse""") elif uploaded_files: st.write(str(len(uploaded_files)) + " document(s) loaded..") textify_output = read_and_textify(uploaded_files) documents = textify_output[0] sources = textify_output[1] #extract embeddings embeddings = OpenAIEmbeddings(openai_api_key = api_key) #vstore with metadata. Here we will store page numbers. vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources]) #deciding model model_name = "gpt-3.5-turbo" # model_name = "gpt-4" retriever = vStore.as_retriever() retriever.search_kwargs = {'k':2} #initiate model llm = OpenAI(model_name=model_name, openai_api_key = api_key, streaming=True) model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever) st.header("Ask your data") user_q = st.text_area("Enter your questions here") if st.button("Get Response"): try: with st.spinner("Model is working on it..."): result = model({"question":user_q}, return_only_outputs=True) st.subheader('Your response:') st.write(result['answer']) st.subheader('Source pages:') st.write(result['sources']) except Exception as e: st.error(f"An error occurred: {e}") st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.') # import gradio as gr # import streamlit as st # from langchain.embeddings.openai import OpenAIEmbeddings # from langchain.text_splitter import CharacterTextSplitter # from langchain.vectorstores import Chroma # from langchain.chains import ConversationalRetrievalChain # from langchain.chat_models import ChatOpenAI # from langchain.document_loaders import PyPDFLoader # import os # import fitz # from PIL import Image # # Global variables # COUNT, N = 0, 0 # chat_history = [] # chain = None # Initialize chain as None # # Function to set the OpenAI API key # api_key = os.environ['OPENAI_API_KEY'] # st.write(api_key) # # Function to enable the API key input box # def enable_api_box(): # return enable_box # # Function to add text to the chat history # def add_text(history, text): # if not text: # raise gr.Error('Enter text') # history = history + [(text, '')] # return history # # Function to process the PDF file and create a conversation chain # def process_file(file): # global chain # if 'OPENAI_API_KEY' not in os.environ: # raise gr.Error('Upload your OpenAI API key') # # Replace with your actual PDF processing logic # loader = PyPDFLoader(file.name) # documents = loader.load() # embeddings = OpenAIEmbeddings() # pdfsearch = Chroma.from_documents(documents, embeddings) # chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3), # retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}), # return_source_documents=True) # return chain # # Function to generate a response based on the chat history and query # def generate_response(history, query, pdf_upload): # global COUNT, N, chat_history, chain # if not pdf_upload: # raise gr.Error(message='Upload a PDF') # if COUNT == 0: # chain = process_file(pdf_upload) # COUNT += 1 # # Replace with your LangChain logic to generate a response # result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True) # chat_history += [(query, result["answer"])] # N = list(result['source_documents'][0])[1][1]['page'] # Adjust as needed # for char in result['answer']: # history[-1][-1] += char # return history, '' # # Function to render a specific page of a PDF file as an image # def render_file(file): # global N # doc = fitz.open(file.name) # page = doc[N] # pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72)) # image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples) # return image # # Function to render initial content from the PDF # def render_first(pdf_file): # # Replace with logic to process the PDF and generate an initial image # image = Image.new('RGB', (600, 400), color = 'white') # Placeholder # return image # # Streamlit & Gradio Interface # st.title("PDF-Powered Chatbot") # with st.container(): # gr.Markdown(""" # # """) # with gr.Blocks() as demo: # pdf_upload1 = gr.UploadButton("📁 Upload PDF 1", file_types=[".pdf"]) # Define pdf_upload1 # # ... (rest of your interface creation) # txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...") # submit_btn = gr.Button('Submit') # @submit_btn.click() # def on_submit(): # add_text(chatbot, txt) # generate_response(chatbot, txt, pdf_upload1) # Use pdf_upload1 here # render_file(pdf_upload1) # Use pdf_upload1 here # if __name__ == "__main__": # gr.Interface( # fn=generate_response, # inputs=[ # "file", # Define pdf_upload1 # "text", # Define chatbot output # "text" # Define txt # ], # outputs=[ # "image", # Define show_img # "text", # Define chatbot output # "text" # Define txt # ], # title="PDF-Powered Chatbot" # ).launch()