File size: 5,663 Bytes
4a118a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
import re
from langchain_groq import ChatGroq
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import bs4
import torch
import os
# Sidebar Style with Multicolored Background
sidebar_bg_style = """
<style>
[data-testid="stSidebar"] {
background: linear-gradient(135deg, #ffafbd, #ffc3a0, #2193b0, #6dd5ed);
}
</style>
"""
st.markdown(sidebar_bg_style, unsafe_allow_html=True)
# Sidebar: Input for URL and API keys
st.sidebar.title("Settings")
# Input field for entering URL dynamically with placeholder and help text
url_input = st.sidebar.text_input("Enter Blog Post URL", placeholder="e.g., https://example.com/blog", help="Paste the full URL of the blog post you want to retrieve data from")
# Validate the URL and show a success message when correct
if url_input:
if re.match(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", url_input):
st.sidebar.markdown('<p style="color:green; font-weight:bold;">URL is correctly entered</p>', unsafe_allow_html=True)
else:
st.sidebar.markdown('<p style="color:red; font-weight:bold;">Invalid URL, please enter a valid one</p>', unsafe_allow_html=True)
# Input fields for API keys with placeholders and helper text
api_key_1 = st.sidebar.text_input("Enter LangChain API Key", type="password", placeholder="Enter your LangChain API Key", help="Please enter a valid LangChain API key here")
api_key_2 = st.sidebar.text_input("Enter Groq API Key", type="password", placeholder="Enter your Groq API Key", help="Please enter your Groq API key here")
# Submit button for API keys with a success/warning message
if st.sidebar.button("Submit API Keys"):
if api_key_1 and api_key_2:
os.environ["LANGCHAIN_API_KEY"] = api_key_1
os.environ["GROQ_API_KEY"] = api_key_2
st.sidebar.markdown('<p style="color:green; font-weight:bold;">Both API keys are entered</p>', unsafe_allow_html=True)
else:
st.sidebar.markdown('<p style="color:red; font-weight:bold;">Please fill in both API keys</p>', unsafe_allow_html=True)
# Main Section with Multicolored Background and Chatbot Title
main_bg_style = """
<style>
body {
background: linear-gradient(135deg, #ff9a9e, #fad0c4, #fbc2eb, #a18cd1);
}
</style>
"""
st.markdown(main_bg_style, unsafe_allow_html=True)
# Title of the chatbot
st.markdown('<h1 style="color:#4CAF50; font-weight:bold;">🤖 Chatbot with URL-based Document Retrieval</h1>', unsafe_allow_html=True)
# Chat query input field with placeholder and help text
query = st.text_input("Ask a question based on the blog post", placeholder="Type your question here...", help="Enter a question related to the content of the blog post")
# Placeholder to display responses
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
# Submit button for chat
if st.button("Submit Query"):
if query and url_input:
# Blog loading logic based on user input URL
loader = WebBaseLoader(
web_paths=(url_input,), # Use the user-input URL
bs_kwargs=dict(
parse_only=bs4.SoupStrainer() # Adjust based on the user's URL structure
),
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
# Define the embedding class
class SentenceTransformerEmbedding:
def __init__(self, model_name):
self.model = SentenceTransformer(model_name)
def embed_documents(self, texts):
embeddings = self.model.encode(texts, convert_to_tensor=True)
if isinstance(embeddings, torch.Tensor):
return embeddings.cpu().detach().numpy().tolist() # Convert tensor to list
return embeddings
def embed_query(self, query):
embedding = self.model.encode([query], convert_to_tensor=True)
if isinstance(embedding, torch.Tensor):
return embedding.cpu().detach().numpy().tolist()[0] # Convert tensor to list
return embedding[0]
# Initialize the embedding model
embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
# Initialize Chroma with the embedding class
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
# Retrieve and generate using the relevant snippets of the blog
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| ChatGroq(model="llama3-8b-8192") # Replace `llm` with an appropriate language model
| StrOutputParser()
)
# Generate the answer using the user's query
result = rag_chain.invoke(query)
# Store query and response in session for chat history
st.session_state['chat_history'].append((query, result))
# Display chat history
for q, r in st.session_state['chat_history']:
st.write(f"**User:** {q}")
st.write(f"**Bot:** {r}") |