AreesaAshfaq commited on
Commit
46018ec
·
verified ·
1 Parent(s): 98cc5ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -40
app.py CHANGED
@@ -8,8 +8,8 @@ from langchain_core.runnables import RunnablePassthrough
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  import bs4
10
  import torch
11
- import getpass
12
-
13
  # APP Title
14
  st.title("Blog Retrieval and Question Answering")
15
 
@@ -21,12 +21,11 @@ api_key_Groq = st.text_input("Enter your Groq_API_KEY", type="password")
21
 
22
  # Check if both API keys have been provided
23
  if not api_key_langchain or not api_key_Groq:
24
- st.write("Please enter both API keys if you want to access this APP.")
25
  else:
26
  st.write("Both API keys are set.")
27
 
28
  # Initialize the LLM with the provided Groq API key
29
- from langchain_groq import ChatGroq
30
  llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)
31
 
32
  # Define the embedding class
@@ -49,45 +48,55 @@ else:
49
  # Initialize the embedding class
50
  embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
51
 
52
- # Load, chunk, and index the contents of the blog
53
- def load_data():
54
- loader = WebBaseLoader(
55
- web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
56
- bs_kwargs=dict(
57
- parse_only=bs4.SoupStrainer(
58
- class_=("post-content", "post-title", "post-header")
59
- )
60
- ),
61
- )
62
- docs = loader.load()
63
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
64
- splits = text_splitter.split_documents(docs)
65
- vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
66
- return vectorstore
67
 
68
- vectorstore = load_data()
69
-
70
- # Streamlit UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- question = st.text_input("Enter your question:")
 
 
 
 
 
73
 
74
- if question:
75
- retriever = vectorstore.as_retriever()
76
- prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)
77
 
78
- def format_docs(docs):
79
- return "\n\n".join(doc.page_content for doc in docs)
80
 
81
- rag_chain = (
82
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
83
- | prompt
84
- | llm # Replace with your LLM or appropriate function if needed
85
- | StrOutputParser()
86
- )
87
 
88
- # Example invocation
89
- try:
90
- result = rag_chain.invoke(question)
91
- st.write("Answer:", result)
92
- except Exception as e:
93
- st.error(f"An error occurred: {e}")
 
 
 
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  import bs4
10
  import torch
11
+ from langchain_groq import ChatGroq
12
+
13
  # APP Title
14
  st.title("Blog Retrieval and Question Answering")
15
 
 
21
 
22
  # Check if both API keys have been provided
23
  if not api_key_langchain or not api_key_Groq:
24
+ st.write("Please enter both API keys to access this APP.")
25
  else:
26
  st.write("Both API keys are set.")
27
 
28
  # Initialize the LLM with the provided Groq API key
 
29
  llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)
30
 
31
  # Define the embedding class
 
48
  # Initialize the embedding class
49
  embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
50
 
51
+ # Streamlit UI for blog URL input
52
+ blog_url = st.text_input("Enter the URL of the blog to retrieve:")
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # Load, chunk, and index the contents of the blog
55
+ def load_data(url):
56
+ try:
57
+ loader = WebBaseLoader(
58
+ web_paths=(url,),
59
+ bs_kwargs=dict(
60
+ parse_only=bs4.SoupStrainer(
61
+ class_=("post-content", "post-title", "post-header")
62
+ )
63
+ ),
64
+ )
65
+ docs = loader.load()
66
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
67
+ splits = text_splitter.split_documents(docs)
68
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
69
+ return vectorstore
70
+ except Exception as e:
71
+ st.error(f"An error occurred while loading the blog: {e}")
72
+ return None
73
 
74
+ # Load the data if a URL is provided
75
+ if blog_url:
76
+ vectorstore = load_data(blog_url)
77
+ if vectorstore:
78
+ # Streamlit UI for question input
79
+ question = st.text_input("Enter your question:")
80
 
81
+ if question:
82
+ retriever = vectorstore.as_retriever()
83
+ prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)
84
 
85
+ def format_docs(docs):
86
+ return "\n\n".join(doc.page_content for doc in docs)
87
 
88
+ rag_chain = (
89
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
90
+ | prompt
91
+ | llm
92
+ | StrOutputParser()
93
+ )
94
 
95
+ # Example invocation
96
+ try:
97
+ result = rag_chain.invoke(question)
98
+ st.write("Answer:", result)
99
+ except Exception as e:
100
+ st.error(f"An error occurred while generating the answer: {e}")
101
+ else:
102
+ st.write("Failed to load the blog content. Please check the URL and try again.")