File size: 3,763 Bytes
ef24768
af98e35
bc5f544
d24775b
 
a59ef83
 
ea89a3e
 
ef24768
c84aaa1
ef24768
 
 
c84aaa1
fb4b26a
1299579
fb4b26a
c84aaa1
1299579
 
 
 
ef24768
1299579
 
ef24768
1299579
 
ef24768
 
 
 
 
 
 
 
 
 
 
 
 
 
e379c49
ef24768
 
 
 
 
 
 
 
 
65913cc
ef24768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbd1f0b
1299579
ef24768
 
 
 
 
 
 
 
 
1299579
 
 
 
ef24768
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from pymongo import MongoClient
# error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.embeddings import OpenAIEmbeddings
# error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
# error since Jan 2024, from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
# error since Jan 2024, from langchain.llms import OpenAI
from langchain_community.llms import OpenAI
from langchain.chains import RetrievalQA
import gradio as gr
from gradio.themes.base import Base
#import key_param
import os

def query_data(query,openai_api_key,mongo_uri):
    #openai_api_key = os.getenv("OPENAI_API_KEY")
     #mongo_uri = os.getenv("MONGO_URI")

    client = MongoClient(mongo_uri)
    dbName = "langchain_demo"
    collectionName = "collection_of_text_blobs"
    collection = client[dbName][collectionName]

    # Define the text embedding model
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Initialize the Vector Store
    vectorStore = MongoDBAtlasVectorSearch( collection, embeddings, index_name="default" )

    # Convert question to vector using OpenAI embeddings
    # Perform Atlas Vector Search using Langchain's vectorStore
    # similarity_search returns MongoDB documents most similar to the query    

    docs = vectorStore.similarity_search(query, K=1)
    as_output = docs[0].page_content

    # Leveraging Atlas Vector Search paired with Langchain's QARetriever

    # Define the LLM that we want to use -- note that this is the Language Generation Model and NOT an Embedding Model
    # If it's not specified (for example like in the code below),
    # then the default OpenAI model used in LangChain is OpenAI GPT-3.5-turbo, as of August 30, 2023
    
    llm = OpenAI(openai_api_key=openai_api_key, temperature=0, model_name='gpt-4-1106-preview')


    # Get VectorStoreRetriever: Specifically, Retriever for MongoDB VectorStore.
    # Implements _get_relevant_documents which retrieves documents relevant to a query.
    retriever = vectorStore.as_retriever()

    # Load "stuff" documents chain. Stuff documents chain takes a list of documents,
    # inserts them all into a prompt and passes that prompt to an LLM.

    qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever, model_name='gpt-4-1106-preview')

    # Execute the chain

    retriever_output = qa.run(query)


    # Return Atlas Vector Search output, and output generated using RAG Architecture
    return as_output, retriever_output

# Create a web interface for the app, using Gradio

with gr.Blocks(theme=Base(), title="Question Answering App using Vector Search + RAG") as demo:
    gr.Markdown(
        """
        # Question Answering App using Atlas Vector Search + RAG Architecture
        """)
    openai_api_key = gr.Textbox(label = "OpenAI 3.5 API Key", value = "sk-", lines = 1)
    mongo_uri = gr.Textbox(label = "Mongo URI", value = "mongodb+srv://", lines = 1)
    textbox = gr.Textbox(label="Enter your Question:")
    with gr.Row():
        button = gr.Button("Submit", variant="primary")
    with gr.Column():
        output1 = gr.Textbox(lines=1, max_lines=10, label="Output with just Atlas Vector Search (returns text field as is):")
        output2 = gr.Textbox(lines=1, max_lines=10, label="Output generated by chaining Atlas Vector Search to Langchain's RetrieverQA + OpenAI LLM:")

# Call query_data function upon clicking the Submit button

    button.click(query_data,
                 inputs=[textbox, openai_api_key, mongo_uri],
                 outputs=[output1, output2]
                )

demo.launch()