Spaces:
Runtime error
Runtime error
from dotenv import load_dotenv | |
# Load environment variables from .env file | |
load_dotenv() | |
from langchain.embeddings import OpenAIEmbeddings # for creating embeddings | |
from langchain.vectorstores import Chroma # for the vectorization part | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.llms import OpenAI # the LLM model we'll use (CHatGPT) | |
import gradio as gr | |
max_sources = 4 | |
DB_DIR = "chroma" | |
embedding = OpenAIEmbeddings() | |
vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embedding) | |
pdf_qa = ConversationalRetrievalChain.from_llm( | |
OpenAI(temperature=0.9, model_name="gpt-3.5-turbo"), | |
vectordb.as_retriever(), | |
return_source_documents=True, | |
) | |
def chat_pdf(query, chat_history=""): | |
result = pdf_qa({"question": query, "chat_history": chat_history}) | |
answer = result["answer"] | |
source_docs = result["source_documents"] | |
print("source_docs", len(source_docs)) | |
cleaned_docs = [] | |
for doc in source_docs: | |
cleaned_content = doc.page_content | |
metadata_info = f"Metadata: {doc.metadata}\n" | |
cleaned_docs.append(metadata_info + cleaned_content) | |
# Pad the outputs to match the number of output components in the Gradio interface | |
padded_outputs = [answer] + cleaned_docs + [""] * (max_sources - len(cleaned_docs)) | |
return padded_outputs | |
def create_outputs(num_sources): | |
outputs = [gr.outputs.Textbox(label="Answer")] | |
for i in range(1, num_sources + 1): | |
outputs.append(gr.outputs.Textbox(label=f"Source Document {i}")) | |
return outputs | |
iface = gr.Interface( | |
fn=chat_pdf, | |
inputs=[gr.inputs.Textbox(label="Query")], | |
outputs=create_outputs(max_sources), | |
examples=[ | |
["Give 2 species of fulgoroidea"], | |
["What colors are found among fulgoroidea?"], | |
["Why are fulgoroidea so cute?"], | |
], | |
) | |
iface.launch(debug=True) | |