Spaces:
Runtime error
Runtime error
import streamlit as st | |
import langchain | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain import OpenAI, VectorDBQA | |
from langchain.chains import RetrievalQAWithSourcesChain | |
import PyPDF2 | |
api_key = os.environ["OPENAI_API_KEY"] | |
#This function will go through pdf and extract and return list of page texts. | |
def read_and_textify(files): | |
text_list = [] | |
sources_list = [] | |
for file in files: | |
pdfReader = PyPDF2.PdfReader(file) | |
#print("Page Number:", len(pdfReader.pages)) | |
for i in range(len(pdfReader.pages)): | |
pageObj = pdfReader.pages[i] | |
text = pageObj.extract_text() | |
pageObj.clear() | |
text_list.append(text) | |
sources_list.append(file.name + "_page_"+str(i)) | |
return [text_list,sources_list] | |
st.set_page_config(layout="centered", page_title="Multidoc_QnA") | |
st.header("Multidoc_QnA") | |
st.write("---") | |
#file uploader | |
uploaded_files = st.file_uploader("Upload documents",accept_multiple_files=True, type=["txt","pdf"]) | |
st.write("---") | |
if uploaded_files is None: | |
st.info(f"""Upload files to analyse""") | |
elif uploaded_files: | |
st.write(str(len(uploaded_files)) + " document(s) loaded..") | |
textify_output = read_and_textify(uploaded_files) | |
documents = textify_output[0] | |
sources = textify_output[1] | |
#extract embeddings | |
embeddings = OpenAIEmbeddings(openai_api_key = api_key) | |
#vstore with metadata. Here we will store page numbers. | |
vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources]) | |
#deciding model | |
model_name = "gpt-3.5-turbo" | |
# model_name = "gpt-4" | |
retriever = vStore.as_retriever() | |
retriever.search_kwargs = {'k':2} | |
#initiate model | |
llm = OpenAI(model_name=model_name, openai_api_key = api_key, streaming=True) | |
model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever) | |
st.header("Ask your data") | |
user_q = st.text_area("Enter your questions here") | |
if st.button("Get Response"): | |
try: | |
with st.spinner("Model is working on it..."): | |
result = model({"question":user_q}, return_only_outputs=True) | |
st.subheader('Your response:') | |
st.write(result['answer']) | |
st.subheader('Source pages:') | |
st.write(result['sources']) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |
st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.') | |
# import gradio as gr | |
# import streamlit as st | |
# from langchain.embeddings.openai import OpenAIEmbeddings | |
# from langchain.text_splitter import CharacterTextSplitter | |
# from langchain.vectorstores import Chroma | |
# from langchain.chains import ConversationalRetrievalChain | |
# from langchain.chat_models import ChatOpenAI | |
# from langchain.document_loaders import PyPDFLoader | |
# import os | |
# import fitz | |
# from PIL import Image | |
# # Global variables | |
# COUNT, N = 0, 0 | |
# chat_history = [] | |
# chain = None # Initialize chain as None | |
# # Function to set the OpenAI API key | |
# api_key = os.environ['OPENAI_API_KEY'] | |
# st.write(api_key) | |
# # Function to enable the API key input box | |
# def enable_api_box(): | |
# return enable_box | |
# # Function to add text to the chat history | |
# def add_text(history, text): | |
# if not text: | |
# raise gr.Error('Enter text') | |
# history = history + [(text, '')] | |
# return history | |
# # Function to process the PDF file and create a conversation chain | |
# def process_file(file): | |
# global chain | |
# if 'OPENAI_API_KEY' not in os.environ: | |
# raise gr.Error('Upload your OpenAI API key') | |
# # Replace with your actual PDF processing logic | |
# loader = PyPDFLoader(file.name) | |
# documents = loader.load() | |
# embeddings = OpenAIEmbeddings() | |
# pdfsearch = Chroma.from_documents(documents, embeddings) | |
# chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3), | |
# retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}), | |
# return_source_documents=True) | |
# return chain | |
# # Function to generate a response based on the chat history and query | |
# def generate_response(history, query, pdf_upload): | |
# global COUNT, N, chat_history, chain | |
# if not pdf_upload: | |
# raise gr.Error(message='Upload a PDF') | |
# if COUNT == 0: | |
# chain = process_file(pdf_upload) | |
# COUNT += 1 | |
# # Replace with your LangChain logic to generate a response | |
# result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True) | |
# chat_history += [(query, result["answer"])] | |
# N = list(result['source_documents'][0])[1][1]['page'] # Adjust as needed | |
# for char in result['answer']: | |
# history[-1][-1] += char | |
# return history, '' | |
# # Function to render a specific page of a PDF file as an image | |
# def render_file(file): | |
# global N | |
# doc = fitz.open(file.name) | |
# page = doc[N] | |
# pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72)) | |
# image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples) | |
# return image | |
# # Function to render initial content from the PDF | |
# def render_first(pdf_file): | |
# # Replace with logic to process the PDF and generate an initial image | |
# image = Image.new('RGB', (600, 400), color = 'white') # Placeholder | |
# return image | |
# # Streamlit & Gradio Interface | |
# st.title("PDF-Powered Chatbot") | |
# with st.container(): | |
# gr.Markdown(""" | |
# <style> | |
# .image-container { height: 680px; } | |
# </style> | |
# """) | |
# with gr.Blocks() as demo: | |
# pdf_upload1 = gr.UploadButton("π Upload PDF 1", file_types=[".pdf"]) # Define pdf_upload1 | |
# # ... (rest of your interface creation) | |
# txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...") | |
# submit_btn = gr.Button('Submit') | |
# @submit_btn.click() | |
# def on_submit(): | |
# add_text(chatbot, txt) | |
# generate_response(chatbot, txt, pdf_upload1) # Use pdf_upload1 here | |
# render_file(pdf_upload1) # Use pdf_upload1 here | |
# if __name__ == "__main__": | |
# gr.Interface( | |
# fn=generate_response, | |
# inputs=[ | |
# "file", # Define pdf_upload1 | |
# "text", # Define chatbot output | |
# "text" # Define txt | |
# ], | |
# outputs=[ | |
# "image", # Define show_img | |
# "text", # Define chatbot output | |
# "text" # Define txt | |
# ], | |
# title="PDF-Powered Chatbot" | |
# ).launch() | |