File size: 3,519 Bytes
0a6ed15
 
 
 
 
 
 
 
 
 
946b41d
c67883e
 
cf293a5
8483ca7
8c7e036
c469b78
3d0b71c
8c7e036
 
0a6ed15
812e3d9
 
c67883e
8c7e036
812e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946b41d
812e3d9
946b41d
812e3d9
946b41d
 
 
812e3d9
 
 
 
 
 
 
 
946b41d
812e3d9
946b41d
 
812e3d9
946b41d
812e3d9
946b41d
 
 
f9a1a72
946b41d
 
f9a1a72
946b41d
 
 
 
 
 
812e3d9
946b41d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import streamlit as st
from sentence_transformers import SentenceTransformer
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
import bs4
import torch
import getpass
 
# Streamlit UI
st.title("Blog Retrieval and Question Answering")

# Prompt the user to enter their Langchain API key
api_key_langchain = st.text_input("Enter your LANGCHAIN_API_KEY", type="password")

# Prompt the user to enter their Groq API key
api_key_Groq = st.text_input("Enter your Groq_API_KEY", type="password")

# Check if both API keys have been provided
if not api_key_langchain or not api_key_Groq:
    st.write("Please enter both API keys if you want to access this APP.")
else:
    st.write("Both API keys are set.")

    # Initialize the LLM with the provided Groq API key
    from langchain_groq import ChatGroq
    llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)

    # Define the embedding class
    class SentenceTransformerEmbedding:
        def __init__(self, model_name):
            self.model = SentenceTransformer(model_name)
        
        def embed_documents(self, texts):
            embeddings = self.model.encode(texts, convert_to_tensor=True)
            if isinstance(embeddings, torch.Tensor):
                return embeddings.cpu().detach().numpy().tolist()  # Convert tensor to list
            return embeddings
        
        def embed_query(self, query):
            embedding = self.model.encode([query], convert_to_tensor=True)
            if isinstance(embedding, torch.Tensor):
                return embedding.cpu().detach().numpy().tolist()[0]  # Convert tensor to list
            return embedding[0]

    # Initialize the embedding class
    embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')

    # Load, chunk, and index the contents of the blog
    def load_data():
        loader = WebBaseLoader(
            web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
            bs_kwargs=dict(
                parse_only=bs4.SoupStrainer(
                    class_=("post-content", "post-title", "post-header")
                )
            ),
        )
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        splits = text_splitter.split_documents(docs)
        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
        return vectorstore

    vectorstore = load_data()

    # Streamlit UI
    st.title("Blog Retrieval and Question Answering")

    question = st.text_input("Enter your question:")

    if question:
        retriever = vectorstore.as_retriever()
        prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)

        def format_docs(docs):
            return "\n\n".join(doc.page_content for doc in docs)

        rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm  # Replace with your LLM or appropriate function if needed
            | StrOutputParser()
        )

        # Example invocation
        try:
            result = rag_chain.invoke(question)
            st.write("Answer:", result)
        except Exception as e:
            st.error(f"An error occurred: {e}")