Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,14 +7,10 @@ from bs4 import BeautifulSoup
|
|
7 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
8 |
from langchain.chains import RetrievalQAWithSourcesChain
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
-
from langchain.vectorstores import
|
11 |
from langchain_groq import ChatGroq
|
12 |
from dotenv import load_dotenv
|
13 |
from langchain.schema import Document
|
14 |
-
from langchain.vectorstores import FAISS
|
15 |
-
from langchain_openai import OpenAI
|
16 |
-
from langchain.llms import OpenAI
|
17 |
-
|
18 |
|
19 |
load_dotenv() # Load environment variables from .env file
|
20 |
|
@@ -27,7 +23,6 @@ process_url_clicked = st.sidebar.button("Process URLs")
|
|
27 |
file_path = "faiss_store_openai.pkl"
|
28 |
|
29 |
main_placeholder = st.empty()
|
30 |
-
#llm = OpenAI(model_name="gpt-4o", temperature=0.9, max_tokens=500)
|
31 |
llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
|
32 |
|
33 |
def fetch_web_content(url):
|
@@ -44,7 +39,7 @@ if process_url_clicked:
|
|
44 |
main_placeholder.text("Data Loading...Started...β
β
β
")
|
45 |
|
46 |
# Fetch content from URLs
|
47 |
-
data = [fetch_web_content(url) for url in urls if url.strip()]
|
48 |
|
49 |
main_placeholder.text("Data Loading...Completed...β
β
β
")
|
50 |
|
@@ -54,14 +49,18 @@ if process_url_clicked:
|
|
54 |
chunk_size=1000
|
55 |
)
|
56 |
main_placeholder.text("Text Splitting...Started...β
β
β
")
|
57 |
-
|
58 |
-
docs =
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
62 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
63 |
-
#vectorstore_huggingface = Chroma.from_documents(docs, embedding_model)
|
64 |
vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
|
|
|
65 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
66 |
time.sleep(2)
|
67 |
|
@@ -75,7 +74,8 @@ if query:
|
|
75 |
if os.path.exists(file_path):
|
76 |
with open(file_path, "rb") as f:
|
77 |
vectorstore = pickle.load(f)
|
78 |
-
|
|
|
79 |
result = chain({"question": query}, return_only_outputs=True)
|
80 |
|
81 |
# Display answer
|
@@ -83,15 +83,11 @@ if query:
|
|
83 |
st.write(result["answer"])
|
84 |
|
85 |
# Display sources, if available
|
86 |
-
sources = result.get("sources", "")
|
87 |
if sources:
|
88 |
st.subheader("Sources:")
|
89 |
sources_list = sources.split("\n")
|
90 |
for source in sources_list:
|
91 |
st.write(source)
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
7 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
8 |
from langchain.chains import RetrievalQAWithSourcesChain
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
+
from langchain.vectorstores import FAISS
|
11 |
from langchain_groq import ChatGroq
|
12 |
from dotenv import load_dotenv
|
13 |
from langchain.schema import Document
|
|
|
|
|
|
|
|
|
14 |
|
15 |
load_dotenv() # Load environment variables from .env file
|
16 |
|
|
|
23 |
file_path = "faiss_store_openai.pkl"
|
24 |
|
25 |
main_placeholder = st.empty()
|
|
|
26 |
llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
|
27 |
|
28 |
def fetch_web_content(url):
|
|
|
39 |
main_placeholder.text("Data Loading...Started...β
β
β
")
|
40 |
|
41 |
# Fetch content from URLs
|
42 |
+
data = [(url, fetch_web_content(url)) for url in urls if url.strip()]
|
43 |
|
44 |
main_placeholder.text("Data Loading...Completed...β
β
β
")
|
45 |
|
|
|
49 |
chunk_size=1000
|
50 |
)
|
51 |
main_placeholder.text("Text Splitting...Started...β
β
β
")
|
52 |
+
|
53 |
+
docs = []
|
54 |
+
for url, text in data:
|
55 |
+
split_docs = text_splitter.split_text(text)
|
56 |
+
docs.extend([Document(page_content=chunk, metadata={"source": url}) for chunk in split_docs])
|
57 |
+
|
58 |
+
main_placeholder.text("Text Splitting...Completed...β
β
β
")
|
59 |
+
|
60 |
+
# Create embeddings and save to FAISS vector store
|
61 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
62 |
vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
|
63 |
+
|
64 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
65 |
time.sleep(2)
|
66 |
|
|
|
74 |
if os.path.exists(file_path):
|
75 |
with open(file_path, "rb") as f:
|
76 |
vectorstore = pickle.load(f)
|
77 |
+
retriever = vectorstore.as_retriever()
|
78 |
+
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=retriever)
|
79 |
result = chain({"question": query}, return_only_outputs=True)
|
80 |
|
81 |
# Display answer
|
|
|
83 |
st.write(result["answer"])
|
84 |
|
85 |
# Display sources, if available
|
86 |
+
sources = result.get("sources", "").strip()
|
87 |
if sources:
|
88 |
st.subheader("Sources:")
|
89 |
sources_list = sources.split("\n")
|
90 |
for source in sources_list:
|
91 |
st.write(source)
|
92 |
+
else:
|
93 |
+
st.write("No sources found.")
|
|
|
|
|
|
|
|