Spaces:

AreesaAshfaq
/

BlogRetrievalQA

Sleeping

App Files Files Community

AreesaAshfaq commited on Aug 28, 2024

Commit

46018ec

verified ·

1 Parent(s): 98cc5ee

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -40

app.py CHANGED Viewed

@@ -8,8 +8,8 @@ from langchain_core.runnables import RunnablePassthrough
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 import bs4
 import torch
-import getpass
 # APP Title
 st.title("Blog Retrieval and Question Answering")
@@ -21,12 +21,11 @@ api_key_Groq = st.text_input("Enter your Groq_API_KEY", type="password")
 # Check if both API keys have been provided
 if not api_key_langchain or not api_key_Groq:
-    st.write("Please enter both API keys if you want to access this APP.")
 else:
     st.write("Both API keys are set.")
     # Initialize the LLM with the provided Groq API key
-    from langchain_groq import ChatGroq
     llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)
     # Define the embedding class
@@ -49,45 +48,55 @@ else:
     # Initialize the embedding class
     embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
-    # Load, chunk, and index the contents of the blog
-    def load_data():
-        loader = WebBaseLoader(
-            web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
-            bs_kwargs=dict(
-                parse_only=bs4.SoupStrainer(
-                    class_=("post-content", "post-title", "post-header")
-                )
-            ),
-        )
-        docs = loader.load()
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-        splits = text_splitter.split_documents(docs)
-        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
-        return vectorstore
-    vectorstore = load_data()
-    # Streamlit UI
-    question = st.text_input("Enter your question:")
-    if question:
-        retriever = vectorstore.as_retriever()
-        prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)
-        def format_docs(docs):
-            return "\n\n".join(doc.page_content for doc in docs)
-        rag_chain = (
-            {"context": retriever | format_docs, "question": RunnablePassthrough()}
-            | prompt
-            | llm  # Replace with your LLM or appropriate function if needed
-            | StrOutputParser()
-        )
-        # Example invocation
-        try:
-            result = rag_chain.invoke(question)
-            st.write("Answer:", result)
-        except Exception as e:
-            st.error(f"An error occurred: {e}")

 from langchain_text_splitters import RecursiveCharacterTextSplitter
 import bs4
 import torch
+from langchain_groq import ChatGroq
 # APP Title
 st.title("Blog Retrieval and Question Answering")
 # Check if both API keys have been provided
 if not api_key_langchain or not api_key_Groq:
+    st.write("Please enter both API keys to access this APP.")
 else:
     st.write("Both API keys are set.")
     # Initialize the LLM with the provided Groq API key
     llm = ChatGroq(model="llama3-8b-8192", groq_api_key=api_key_Groq)
     # Define the embedding class
     # Initialize the embedding class
     embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
+    # Streamlit UI for blog URL input
+    blog_url = st.text_input("Enter the URL of the blog to retrieve:")
+    # Load, chunk, and index the contents of the blog
+    def load_data(url):
+        try:
+            loader = WebBaseLoader(
+                web_paths=(url,),
+                bs_kwargs=dict(
+                    parse_only=bs4.SoupStrainer(
+                        class_=("post-content", "post-title", "post-header")
+                    )
+                ),
+            )
+            docs = loader.load()
+            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+            splits = text_splitter.split_documents(docs)
+            vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
+            return vectorstore
+        except Exception as e:
+            st.error(f"An error occurred while loading the blog: {e}")
+            return None
+    # Load the data if a URL is provided
+    if blog_url:
+        vectorstore = load_data(blog_url)
+        if vectorstore:
+            # Streamlit UI for question input
+            question = st.text_input("Enter your question:")
+            if question:
+                retriever = vectorstore.as_retriever()
+                prompt = hub.pull("rlm/rag-prompt", api_key=api_key_langchain)
+                def format_docs(docs):
+                    return "\n\n".join(doc.page_content for doc in docs)
+                rag_chain = (
+                    {"context": retriever | format_docs, "question": RunnablePassthrough()}
+                    | prompt
+                    | llm
+                    | StrOutputParser()
+                )
+                # Example invocation
+                try:
+                    result = rag_chain.invoke(question)
+                    st.write("Answer:", result)
+                except Exception as e:
+                    st.error(f"An error occurred while generating the answer: {e}")
+        else:
+            st.write("Failed to load the blog content. Please check the URL and try again.")