Spaces:
Sleeping
Sleeping
File size: 3,649 Bytes
ef24768 8b0f111 42c08ad 8b0f111 ef24768 c84aaa1 ef24768 c84aaa1 fb4b26a 42c08ad c84aaa1 1299579 ef24768 1299579 ef24768 1299579 ef24768 2de5f29 ef24768 2de5f29 ef24768 2de5f29 ef24768 2de5f29 ef24768 2fa7d52 ef24768 2de5f29 ef24768 1299579 ef24768 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from pymongo import MongoClient
# error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
# error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
# error since Jan 2024, from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
# error since Jan 2024, from langchain.llms import OpenAI
from langchain_community.llms import OpenAI
from langchain.chains import RetrievalQA
import gradio as gr
from gradio.themes.base import Base
#import key_param
import os
def query_data(query,openai_api_key,mongo_uri):
os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["MONGO_URI"] = mongo_uri
client = MongoClient(mongo_uri)
dbName = "langchain_demo"
collectionName = "collection_of_text_blobs"
collection = client[dbName][collectionName]
# Define the text embedding model
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Initialize the Vector Store
vectorStore = MongoDBAtlasVectorSearch( collection, embeddings, index_name="default" )
# Convert question to vector using OpenAI embeddings
# Perform Atlas Vector Search using Langchain's vectorStore
# similarity_search returns MongoDB documents most similar to the query
docs = vectorStore.similarity_search(query, K=1)
as_output = docs[0].page_content
# Leveraging Atlas Vector Search paired with Langchain's QARetriever
# Define the LLM that we want to use -- note that this is the Language Generation Model and NOT an Embedding Model
# If it's not specified (for example like in the code below),
# then the default OpenAI model used in LangChain is OpenAI GPT-3.5-turbo, as of August 30, 2023
llm = OpenAI(openai_api_key=openai_api_key, temperature=0)
# Get VectorStoreRetriever: Specifically, Retriever for MongoDB VectorStore.
# Implements _get_relevant_documents which retrieves documents relevant to a query.
retriever = vectorStore.as_retriever()
# Load "stuff" documents chain. Stuff documents chain takes a list of documents,
# inserts them all into a prompt and passes that prompt to an LLM.
qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever)
# Execute the chain
retriever_output = qa.run(query)
# Return Atlas Vector Search output, and output generated using RAG Architecture
return as_output, retriever_output
# Create a web interface for the app, using Gradio
with gr.Blocks(theme=Base(), title="MongoDB Atlas Vector Search + RAG Architecture") as demo:
gr.Markdown(
"""
# MongoDB Atlas Vector Search + RAG Architecture
""")
openai_api_key = gr.Textbox(label = "OpenAI API Key (sk-...)", type = "password", lines = 1)
mongo_uri = gr.Textbox(label = "Mongo Atlas URI (mongodb+srv://..)", type = "password", lines = 1)
textbox = gr.Textbox(label="Enter your Question:")
with gr.Row():
button = gr.Button("Submit", variant="primary")
with gr.Column():
output1 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output (document field as is):")
output2 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output + Langchain's RetrieverQA + OpenAI LLM:")
# Call query_data function upon clicking the Submit button
button.click(query_data,
inputs=[textbox, openai_api_key, mongo_uri],
outputs=[output1, output2]
)
demo.launch()
|