File size: 4,480 Bytes
0a6ed15
 
 
 
 
 
 
 
 
 
46018ec
 
98cc5ee
cf293a5
8483ca7
8c7e036
c469b78
3d0b71c
8c7e036
 
0a6ed15
812e3d9
 
46018ec
8c7e036
812e3d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46018ec
 
812e3d9
46018ec
 
cd87b5d
 
 
 
 
 
 
 
 
 
 
 
46018ec
cd87b5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
812e3d9
46018ec
 
 
 
 
 
812e3d9
46018ec
 
 
f9a1a72
46018ec
 
f9a1a72
46018ec
 
 
 
 
 
812e3d9
46018ec
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import streamlit as st
from sentence_transformers import SentenceTransformer
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
import bs4
import torch
from langchain_groq import ChatGroq

# APP Title
st.title("Blog Retrieval and Question Answering")

# Prompt the user to enter their Langchain API key
api_key_langchain = st.text_input("Enter your LANGCHAIN_API_KEY", type="password")

# Prompt the user to enter their Groq API key
api_key_Groq = st.text_input("Enter your Groq_API_KEY", type="password")

# Check if both API keys have been provided
if not api_key_langchain or not api_key_Groq:
    st.write("Please enter both API keys to access this APP.")
else:
    st.write("Both API keys are set.")

    # Initialize the LLM with the provided Groq API key
    llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)

    # Define the embedding class
    class SentenceTransformerEmbedding:
        def __init__(self, model_name):
            self.model = SentenceTransformer(model_name)
        
        def embed_documents(self, texts):
            embeddings = self.model.encode(texts, convert_to_tensor=True)
            if isinstance(embeddings, torch.Tensor):
                return embeddings.cpu().detach().numpy().tolist()  # Convert tensor to list
            return embeddings
        
        def embed_query(self, query):
            embedding = self.model.encode([query], convert_to_tensor=True)
            if isinstance(embedding, torch.Tensor):
                return embedding.cpu().detach().numpy().tolist()[0]  # Convert tensor to list
            return embedding[0]

    # Initialize the embedding class
    embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')

    # Streamlit UI for blog URL input
    blog_url = st.text_input("Enter the URL of the blog to retrieve:")

    # Load, chunk, and index the contents of the blog
    def load_data(url):
    try:
        loader = WebBaseLoader(
            web_paths=(url,),
            bs_kwargs=dict(
                parse_only=bs4.SoupStrainer(
                    class_=("post-content", "post-title", "post-header")
                )
            ),
        )
        docs = loader.load()
        if not docs:
            st.error("No documents were loaded. Please check the URL and try again.")
            return None
        
        st.write(f"Loaded {len(docs)} documents.")

        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        splits = text_splitter.split_documents(docs)
        if not splits:
            st.error("No document splits were created. Please check the document content.")
            return None
        
        st.write(f"Created {len(splits)} document splits.")

        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
        if vectorstore is None:
            st.error("Failed to create the vectorstore.")
            return None
        
        return vectorstore
    except Exception as e:
        st.error(f"An error occurred while loading the blog: {e}")
        return None


    # Load the data if a URL is provided
    if blog_url:
        vectorstore = load_data(blog_url)
        if vectorstore:
            # Streamlit UI for question input
            question = st.text_input("Enter your question:")

            if question:
                retriever = vectorstore.as_retriever()
                prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)

                def format_docs(docs):
                    return "\n\n".join(doc.page_content for doc in docs)

                rag_chain = (
                    {"context": retriever | format_docs, "question": RunnablePassthrough()}
                    | prompt
                    | llm
                    | StrOutputParser()
                )

                # Example invocation
                try:
                    result = rag_chain.invoke(question)
                    st.write("Answer:", result)
                except Exception as e:
                    st.error(f"An error occurred while generating the answer: {e}")
        else:
            st.write("Failed to load the blog content. Please check the URL and try again.")