Spaces:
Sleeping
Sleeping
File size: 3,519 Bytes
0a6ed15 946b41d c67883e cf293a5 8483ca7 8c7e036 c469b78 3d0b71c 8c7e036 0a6ed15 812e3d9 c67883e 8c7e036 812e3d9 946b41d 812e3d9 946b41d 812e3d9 946b41d 812e3d9 946b41d 812e3d9 946b41d 812e3d9 946b41d 812e3d9 946b41d f9a1a72 946b41d f9a1a72 946b41d 812e3d9 946b41d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
from sentence_transformers import SentenceTransformer
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
import bs4
import torch
import getpass
# Streamlit UI
st.title("Blog Retrieval and Question Answering")
# Prompt the user to enter their Langchain API key
api_key_langchain = st.text_input("Enter your LANGCHAIN_API_KEY", type="password")
# Prompt the user to enter their Groq API key
api_key_Groq = st.text_input("Enter your Groq_API_KEY", type="password")
# Check if both API keys have been provided
if not api_key_langchain or not api_key_Groq:
st.write("Please enter both API keys if you want to access this APP.")
else:
st.write("Both API keys are set.")
# Initialize the LLM with the provided Groq API key
from langchain_groq import ChatGroq
llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)
# Define the embedding class
class SentenceTransformerEmbedding:
def __init__(self, model_name):
self.model = SentenceTransformer(model_name)
def embed_documents(self, texts):
embeddings = self.model.encode(texts, convert_to_tensor=True)
if isinstance(embeddings, torch.Tensor):
return embeddings.cpu().detach().numpy().tolist() # Convert tensor to list
return embeddings
def embed_query(self, query):
embedding = self.model.encode([query], convert_to_tensor=True)
if isinstance(embedding, torch.Tensor):
return embedding.cpu().detach().numpy().tolist()[0] # Convert tensor to list
return embedding[0]
# Initialize the embedding class
embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
# Load, chunk, and index the contents of the blog
def load_data():
loader = WebBaseLoader(
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
bs_kwargs=dict(
parse_only=bs4.SoupStrainer(
class_=("post-content", "post-title", "post-header")
)
),
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
return vectorstore
vectorstore = load_data()
# Streamlit UI
st.title("Blog Retrieval and Question Answering")
question = st.text_input("Enter your question:")
if question:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm # Replace with your LLM or appropriate function if needed
| StrOutputParser()
)
# Example invocation
try:
result = rag_chain.invoke(question)
st.write("Answer:", result)
except Exception as e:
st.error(f"An error occurred: {e}") |