Spaces:
Sleeping
Sleeping
File size: 3,646 Bytes
ef24768 d24775b a59ef83 ea89a3e ef24768 c84aaa1 ef24768 c84aaa1 fb4b26a 1299579 fb4b26a c84aaa1 1299579 ef24768 1299579 ef24768 1299579 ef24768 65913cc ef24768 cbd1f0b 1299579 ef24768 1299579 ef24768 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
from pymongo import MongoClient
from langchain.embeddings.openai import OpenAIEmbeddings
# error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
# error since Jan 2024, from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
# error since Jan 2024, from langchain.llms import OpenAI
from langchain_community.llms import OpenAI
from langchain.chains import RetrievalQA
import gradio as gr
from gradio.themes.base import Base
#import key_param
import os
def query_data(query,openai_api_key,mongo_uri):
#openai_api_key = os.getenv("OPENAI_API_KEY")
#mongo_uri = os.getenv("MONGO_URI")
client = MongoClient(mongo_uri)
dbName = "langchain_demo"
collectionName = "collection_of_text_blobs"
collection = client[dbName][collectionName]
# Define the text embedding model
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Initialize the Vector Store
vectorStore = MongoDBAtlasVectorSearch( collection, embeddings, index_name="default" )
# Convert question to vector using OpenAI embeddings
# Perform Atlas Vector Search using Langchain's vectorStore
# similarity_search returns MongoDB documents most similar to the query
docs = vectorStore.similarity_search(query, K=1)
as_output = docs[0].page_content
# Leveraging Atlas Vector Search paired with Langchain's QARetriever
# Define the LLM that we want to use -- note that this is the Language Generation Model and NOT an Embedding Model
# If it's not specified (for example like in the code below),
# then the default OpenAI model used in LangChain is OpenAI GPT-3.5-turbo, as of August 30, 2023
llm = OpenAI(openai_api_key=openai_api_key, temperature=0)
# Get VectorStoreRetriever: Specifically, Retriever for MongoDB VectorStore.
# Implements _get_relevant_documents which retrieves documents relevant to a query.
retriever = vectorStore.as_retriever()
# Load "stuff" documents chain. Stuff documents chain takes a list of documents,
# inserts them all into a prompt and passes that prompt to an LLM.
qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever, model_name='gpt-4-1106-preview')
# Execute the chain
retriever_output = qa.run(query)
# Return Atlas Vector Search output, and output generated using RAG Architecture
return as_output, retriever_output
# Create a web interface for the app, using Gradio
with gr.Blocks(theme=Base(), title="Question Answering App using Vector Search + RAG") as demo:
gr.Markdown(
"""
# Question Answering App using Atlas Vector Search + RAG Architecture
""")
openai_api_key = gr.Textbox(label = "OpenAI 3.5 API Key", value = "sk-", lines = 1)
mongo_uri = gr.Textbox(label = "Mongo URI", value = "mongodb+srv://", lines = 1)
textbox = gr.Textbox(label="Enter your Question:")
with gr.Row():
button = gr.Button("Submit", variant="primary")
with gr.Column():
output1 = gr.Textbox(lines=1, max_lines=10, label="Output with just Atlas Vector Search (returns text field as is):")
output2 = gr.Textbox(lines=1, max_lines=10, label="Output generated by chaining Atlas Vector Search to Langchain's RetrieverQA + OpenAI LLM:")
# Call query_data function upon clicking the Submit button
button.click(query_data,
inputs=[textbox, openai_api_key, mongo_uri],
outputs=[output1, output2]
)
demo.launch()
|