Deaksh commited on
Commit
4d6a1a6
Β·
verified Β·
1 Parent(s): fd90935

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -46
app.py CHANGED
@@ -2,95 +2,82 @@ import os
2
  import streamlit as st
3
  import pickle
4
  import time
 
 
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
- from langchain import OpenAI
7
  from langchain.chains import RetrievalQAWithSourcesChain
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
- from langchain.document_loaders import UnstructuredURLLoader
10
- from langchain_groq import ChatGroq
11
- from langchain.embeddings import OpenAIEmbeddings
12
- from langchain.vectorstores import FAISS
13
  from langchain.vectorstores import Chroma
14
- import requests
15
- from bs4 import BeautifulSoup
16
-
17
-
18
  from dotenv import load_dotenv
19
- load_dotenv() # take environment variables from .env (especially openai api key)
 
20
 
21
  st.title("RockyBot: News Research Tool πŸ“ˆ")
22
  st.sidebar.title("News Article URLs")
23
 
24
- urls = []
25
- for i in range(3):
26
- url = st.sidebar.text_input(f"URL {i+1}")
27
- urls.append(url)
28
-
29
  process_url_clicked = st.sidebar.button("Process URLs")
30
  file_path = "faiss_store_openai.pkl"
31
 
32
  main_placeholder = st.empty()
33
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
34
 
35
- if process_url_clicked:
36
- # load data
37
- #loader = UnstructuredURLLoader(urls=urls)
38
- #main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
39
- #data = loader.load()
40
- def fetch_web_content(url):
41
- try:
42
  response = requests.get(url, timeout=10)
43
  response.raise_for_status()
44
  soup = BeautifulSoup(response.text, "html.parser")
45
  return soup.get_text()
46
- except Exception as e:
47
  return f"Error fetching {url}: {str(e)}"
48
 
49
- # Your list of URLs
50
- url = url
51
-
52
- # Display status message
53
- main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
54
-
55
- # Fetch content
56
- data = [fetch_web_content(url) for url in urls if url.strip()]
57
-
58
- # Display completion message
59
- main_placeholder.text("Data Loading...Completed...βœ…βœ…βœ…")
60
- # split data
61
- text_splitter = RecursiveCharacterTextSplitter(
62
  separators=['\n\n', '\n', '.', ','],
63
  chunk_size=1000
64
- )
65
- main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
66
  docs = text_splitter.split_documents(data)
67
- # create embeddings and save it to FAISS index
 
68
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
69
- #vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
70
  vectorstore_huggingface = Chroma.from_documents(docs, embedding_model)
71
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
72
  time.sleep(2)
73
-
74
- # Save the FAISS index to a pickle file
75
  with open(file_path, "wb") as f:
76
  pickle.dump(vectorstore_huggingface, f)
77
 
78
- query = main_placeholder.text_input("Question: ")
 
79
  if query:
80
  if os.path.exists(file_path):
81
  with open(file_path, "rb") as f:
82
  vectorstore = pickle.load(f)
83
  chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
84
  result = chain({"question": query}, return_only_outputs=True)
85
- # result will be a dictionary of this format --> {"answer": "", "sources": [] }
 
86
  st.header("Answer")
87
  st.write(result["answer"])
88
-
89
  # Display sources, if available
90
  sources = result.get("sources", "")
91
  if sources:
92
  st.subheader("Sources:")
93
- sources_list = sources.split("\n") # Split the sources by newline
94
  for source in sources_list:
95
  st.write(source)
96
 
 
2
  import streamlit as st
3
  import pickle
4
  import time
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
8
  from langchain.chains import RetrievalQAWithSourcesChain
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 
 
10
  from langchain.vectorstores import Chroma
11
+ from langchain_groq import ChatGroq
 
 
 
12
  from dotenv import load_dotenv
13
+
14
+ load_dotenv() # Load environment variables from .env file
15
 
16
  st.title("RockyBot: News Research Tool πŸ“ˆ")
17
  st.sidebar.title("News Article URLs")
18
 
19
+ # Collect URLs from user input
20
+ urls = [st.sidebar.text_input(f"URL {i+1}") for i in range(3)]
 
 
 
21
  process_url_clicked = st.sidebar.button("Process URLs")
22
  file_path = "faiss_store_openai.pkl"
23
 
24
  main_placeholder = st.empty()
25
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
26
 
27
+ def fetch_web_content(url):
28
+ """Fetches text content from a given URL using BeautifulSoup."""
29
+ try:
 
 
 
 
30
  response = requests.get(url, timeout=10)
31
  response.raise_for_status()
32
  soup = BeautifulSoup(response.text, "html.parser")
33
  return soup.get_text()
34
+ except Exception as e:
35
  return f"Error fetching {url}: {str(e)}"
36
 
37
+ if process_url_clicked:
38
+ main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
39
+
40
+ # Fetch content from URLs
41
+ data = [fetch_web_content(url) for url in urls if url.strip()]
42
+
43
+ main_placeholder.text("Data Loading...Completed...βœ…βœ…βœ…")
44
+
45
+ # Split data into chunks
46
+ text_splitter = RecursiveCharacterTextSplitter(
 
 
 
47
  separators=['\n\n', '\n', '.', ','],
48
  chunk_size=1000
49
+ )
50
+ main_placeholder.text("Text Splitting...Started...βœ…βœ…βœ…")
51
  docs = text_splitter.split_documents(data)
52
+
53
+ # Create embeddings and save to Chroma vector store
54
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
55
  vectorstore_huggingface = Chroma.from_documents(docs, embedding_model)
56
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
57
  time.sleep(2)
58
+
59
+ # Save the vector store to a pickle file
60
  with open(file_path, "wb") as f:
61
  pickle.dump(vectorstore_huggingface, f)
62
 
63
+ # User query input
64
+ query = st.text_input("Question: ")
65
  if query:
66
  if os.path.exists(file_path):
67
  with open(file_path, "rb") as f:
68
  vectorstore = pickle.load(f)
69
  chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
70
  result = chain({"question": query}, return_only_outputs=True)
71
+
72
+ # Display answer
73
  st.header("Answer")
74
  st.write(result["answer"])
75
+
76
  # Display sources, if available
77
  sources = result.get("sources", "")
78
  if sources:
79
  st.subheader("Sources:")
80
+ sources_list = sources.split("\n")
81
  for source in sources_list:
82
  st.write(source)
83