File size: 3,459 Bytes
0a6ed15
 
 
 
 
 
 
 
 
 
8483ca7
8c7e036
c469b78
3d0b71c
8c7e036
 
0a6ed15
812e3d9
 
f8730ec
8c7e036
812e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9a1a72
812e3d9
f9a1a72
812e3d9
f9a1a72
812e3d9
 
 
 
 
 
 
 
 
f9a1a72
812e3d9
f9a1a72
 
812e3d9
f9a1a72
 
812e3d9
f9a1a72
812e3d9
f9a1a72
 
 
 
 
 
 
 
 
 
 
 
 
812e3d9
f9a1a72
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import streamlit as st
from sentence_transformers import SentenceTransformer
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
import bs4
import torch

# Prompt the user to enter their Langchain API key
api_key_langchain = st.text_input("Enter your LANGCHAIN_API_KEY", type="password")

# Prompt the user to enter their Groq API key
api_key_Groq = st.text_input("Enter your Groq_API_KEY", type="password")

# Check if both API keys have been provided
if not api_key_langchain or not api_key_Groq:
    st.write("Please enter both API keys if you want to access this app.")
else:
    st.write("Both API keys are set.")

    # Initialize the LLM with the provided Groq API key
    from langchain_groq import ChatGroq
    llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)

    # Define the embedding class
    class SentenceTransformerEmbedding:
        def __init__(self, model_name):
            self.model = SentenceTransformer(model_name)
        
        def embed_documents(self, texts):
            embeddings = self.model.encode(texts, convert_to_tensor=True)
            if isinstance(embeddings, torch.Tensor):
                return embeddings.cpu().detach().numpy().tolist()  # Convert tensor to list
            return embeddings
        
        def embed_query(self, query):
            embedding = self.model.encode([query], convert_to_tensor=True)
            if isinstance(embedding, torch.Tensor):
                return embedding.cpu().detach().numpy().tolist()[0]  # Convert tensor to list
            return embedding[0]

    # Initialize the embedding class
    embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')

    # Load, chunk, and index the contents of the blog
    def load_data(url):
        loader = WebBaseLoader(
            web_paths=(url,),
            bs_kwargs=dict(
                parse_only=bs4.SoupStrainer()
            ),
        )
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        splits = text_splitter.split_documents(docs)
        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
        return vectorstore

    # Streamlit UI
    st.title("URL Retrieval and Question Answering")

    # Input URL from user
    url = st.text_input("Enter the URL:")

    if url:
        vectorstore = load_data(url)

        question = st.text_input("Enter your question:")

        if question:
            retriever = vectorstore.as_retriever()
            prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)

            def format_docs(docs):
                return "\n\n".join(doc.page_content for doc in docs)

            rag_chain = (
                {"context": retriever | format_docs, "question": RunnablePassthrough()}
                | prompt
                | llm  # Replace with your LLM or appropriate function if needed
                | StrOutputParser()
            )

            # Example invocation
            try:
                result = rag_chain.invoke(question)
                st.write("Answer:", result)
            except Exception as e:
                st.error(f"An error occurred: {e}")