Spaces:

Deaksh
/

research-tool

Sleeping

App Files Files Community

Deaksh commited on Feb 19

Commit

4d6a1a6

verified ·

1 Parent(s): fd90935

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -46

app.py CHANGED Viewed

@@ -2,95 +2,82 @@ import os
 import streamlit as st
 import pickle
 import time
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain import OpenAI
 from langchain.chains import RetrievalQAWithSourcesChain
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import UnstructuredURLLoader
-from langchain_groq import ChatGroq
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores import FAISS
 from langchain.vectorstores import Chroma
-import requests
-from bs4 import BeautifulSoup
 from dotenv import load_dotenv
-load_dotenv()  # take environment variables from .env (especially openai api key)
 st.title("RockyBot: News Research Tool 📈")
 st.sidebar.title("News Article URLs")
-urls = []
-for i in range(3):
-    url = st.sidebar.text_input(f"URL {i+1}")
-    urls.append(url)
 process_url_clicked = st.sidebar.button("Process URLs")
 file_path = "faiss_store_openai.pkl"
 main_placeholder = st.empty()
 llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
-if process_url_clicked:
-    # load data
-    #loader = UnstructuredURLLoader(urls=urls)
-    #main_placeholder.text("Data Loading...Started...✅✅✅")
-    #data = loader.load()
-    def fetch_web_content(url):
-      try:
         response = requests.get(url, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
         return soup.get_text()
-      except Exception as e:
         return f"Error fetching {url}: {str(e)}"
-# Your list of URLs
-url = url
-# Display status message
-main_placeholder.text("Data Loading...Started...✅✅✅")
-# Fetch content
-data = [fetch_web_content(url) for url in urls if url.strip()]
-# Display completion message
-main_placeholder.text("Data Loading...Completed...✅✅✅")
-    # split data
-     text_splitter = RecursiveCharacterTextSplitter(
         separators=['\n\n', '\n', '.', ','],
         chunk_size=1000
-     )
-    main_placeholder.text("Text Splitter...Started...✅✅✅")
     docs = text_splitter.split_documents(data)
-    # create embeddings and save it to FAISS index
     embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    #vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
     vectorstore_huggingface = Chroma.from_documents(docs, embedding_model)
     main_placeholder.text("Embedding Vector Started Building...✅✅✅")
     time.sleep(2)
-    # Save the FAISS index to a pickle file
     with open(file_path, "wb") as f:
         pickle.dump(vectorstore_huggingface, f)
-query = main_placeholder.text_input("Question: ")
 if query:
     if os.path.exists(file_path):
         with open(file_path, "rb") as f:
             vectorstore = pickle.load(f)
             chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
             result = chain({"question": query}, return_only_outputs=True)
-            # result will be a dictionary of this format --> {"answer": "", "sources": [] }
             st.header("Answer")
             st.write(result["answer"])
             # Display sources, if available
             sources = result.get("sources", "")
             if sources:
                 st.subheader("Sources:")
-                sources_list = sources.split("\n")  # Split the sources by newline
                 for source in sources_list:
                     st.write(source)

 import streamlit as st
 import pickle
 import time
+import requests
+from bs4 import BeautifulSoup
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQAWithSourcesChain
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
+from langchain_groq import ChatGroq
 from dotenv import load_dotenv
+load_dotenv()  # Load environment variables from .env file
 st.title("RockyBot: News Research Tool 📈")
 st.sidebar.title("News Article URLs")
+# Collect URLs from user input
+urls = [st.sidebar.text_input(f"URL {i+1}") for i in range(3)]
 process_url_clicked = st.sidebar.button("Process URLs")
 file_path = "faiss_store_openai.pkl"
 main_placeholder = st.empty()
 llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
+def fetch_web_content(url):
+    """Fetches text content from a given URL using BeautifulSoup."""
+    try:
         response = requests.get(url, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
         return soup.get_text()
+    except Exception as e:
         return f"Error fetching {url}: {str(e)}"
+if process_url_clicked:
+    main_placeholder.text("Data Loading...Started...✅✅✅")
+    # Fetch content from URLs
+    data = [fetch_web_content(url) for url in urls if url.strip()]
+    main_placeholder.text("Data Loading...Completed...✅✅✅")
+    # Split data into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
         separators=['\n\n', '\n', '.', ','],
         chunk_size=1000
+    )
+    main_placeholder.text("Text Splitting...Started...✅✅✅")
     docs = text_splitter.split_documents(data)
+    # Create embeddings and save to Chroma vector store
     embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vectorstore_huggingface = Chroma.from_documents(docs, embedding_model)
     main_placeholder.text("Embedding Vector Started Building...✅✅✅")
     time.sleep(2)
+    # Save the vector store to a pickle file
     with open(file_path, "wb") as f:
         pickle.dump(vectorstore_huggingface, f)
+# User query input
+query = st.text_input("Question: ")
 if query:
     if os.path.exists(file_path):
         with open(file_path, "rb") as f:
             vectorstore = pickle.load(f)
             chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
             result = chain({"question": query}, return_only_outputs=True)
+            # Display answer
             st.header("Answer")
             st.write(result["answer"])
             # Display sources, if available
             sources = result.get("sources", "")
             if sources:
                 st.subheader("Sources:")
+                sources_list = sources.split("\n")
                 for source in sources_list:
                     st.write(source)