Deaksh commited on
Commit
1dc72a3
Β·
verified Β·
1 Parent(s): 8e8caad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -21
app.py CHANGED
@@ -7,14 +7,10 @@ from bs4 import BeautifulSoup
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain.chains import RetrievalQAWithSourcesChain
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
- from langchain.vectorstores import Chroma
11
  from langchain_groq import ChatGroq
12
  from dotenv import load_dotenv
13
  from langchain.schema import Document
14
- from langchain.vectorstores import FAISS
15
- from langchain_openai import OpenAI
16
- from langchain.llms import OpenAI
17
-
18
 
19
  load_dotenv() # Load environment variables from .env file
20
 
@@ -27,7 +23,6 @@ process_url_clicked = st.sidebar.button("Process URLs")
27
  file_path = "faiss_store_openai.pkl"
28
 
29
  main_placeholder = st.empty()
30
- #llm = OpenAI(model_name="gpt-4o", temperature=0.9, max_tokens=500)
31
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
32
 
33
  def fetch_web_content(url):
@@ -44,7 +39,7 @@ if process_url_clicked:
44
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
45
 
46
  # Fetch content from URLs
47
- data = [fetch_web_content(url) for url in urls if url.strip()]
48
 
49
  main_placeholder.text("Data Loading...Completed...βœ…βœ…βœ…")
50
 
@@ -54,14 +49,18 @@ if process_url_clicked:
54
  chunk_size=1000
55
  )
56
  main_placeholder.text("Text Splitting...Started...βœ…βœ…βœ…")
57
- docs = [Document(page_content=text) for text in data]
58
- docs = text_splitter.split_documents(docs)
59
- #docs = text_splitter.split_documents(data)
60
-
61
- # Create embeddings and save to Chroma vector store
 
 
 
 
62
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
63
- #vectorstore_huggingface = Chroma.from_documents(docs, embedding_model)
64
  vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
 
65
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
66
  time.sleep(2)
67
 
@@ -75,7 +74,8 @@ if query:
75
  if os.path.exists(file_path):
76
  with open(file_path, "rb") as f:
77
  vectorstore = pickle.load(f)
78
- chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
 
79
  result = chain({"question": query}, return_only_outputs=True)
80
 
81
  # Display answer
@@ -83,15 +83,11 @@ if query:
83
  st.write(result["answer"])
84
 
85
  # Display sources, if available
86
- sources = result.get("sources", "")
87
  if sources:
88
  st.subheader("Sources:")
89
  sources_list = sources.split("\n")
90
  for source in sources_list:
91
  st.write(source)
92
-
93
-
94
-
95
-
96
-
97
-
 
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain.chains import RetrievalQAWithSourcesChain
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.vectorstores import FAISS
11
  from langchain_groq import ChatGroq
12
  from dotenv import load_dotenv
13
  from langchain.schema import Document
 
 
 
 
14
 
15
  load_dotenv() # Load environment variables from .env file
16
 
 
23
  file_path = "faiss_store_openai.pkl"
24
 
25
  main_placeholder = st.empty()
 
26
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
27
 
28
  def fetch_web_content(url):
 
39
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
40
 
41
  # Fetch content from URLs
42
+ data = [(url, fetch_web_content(url)) for url in urls if url.strip()]
43
 
44
  main_placeholder.text("Data Loading...Completed...βœ…βœ…βœ…")
45
 
 
49
  chunk_size=1000
50
  )
51
  main_placeholder.text("Text Splitting...Started...βœ…βœ…βœ…")
52
+
53
+ docs = []
54
+ for url, text in data:
55
+ split_docs = text_splitter.split_text(text)
56
+ docs.extend([Document(page_content=chunk, metadata={"source": url}) for chunk in split_docs])
57
+
58
+ main_placeholder.text("Text Splitting...Completed...βœ…βœ…βœ…")
59
+
60
+ # Create embeddings and save to FAISS vector store
61
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
62
  vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
63
+
64
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
65
  time.sleep(2)
66
 
 
74
  if os.path.exists(file_path):
75
  with open(file_path, "rb") as f:
76
  vectorstore = pickle.load(f)
77
+ retriever = vectorstore.as_retriever()
78
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=retriever)
79
  result = chain({"question": query}, return_only_outputs=True)
80
 
81
  # Display answer
 
83
  st.write(result["answer"])
84
 
85
  # Display sources, if available
86
+ sources = result.get("sources", "").strip()
87
  if sources:
88
  st.subheader("Sources:")
89
  sources_list = sources.split("\n")
90
  for source in sources_list:
91
  st.write(source)
92
+ else:
93
+ st.write("No sources found.")