Deaksh commited on
Commit
a471490
Β·
verified Β·
1 Parent(s): bcc4e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -3,13 +3,12 @@ import streamlit as st
3
  import pickle
4
  import time
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
- from langchain import OpenAI
7
  from langchain.chains import RetrievalQAWithSourcesChain
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain.document_loaders import UnstructuredURLLoader
10
  from langchain_groq import ChatGroq
11
- from langchain.embeddings import OpenAIEmbeddings
12
  from langchain.vectorstores import FAISS
 
13
 
14
  from dotenv import load_dotenv
15
  load_dotenv() # take environment variables from .env (especially openai api key)
@@ -29,35 +28,53 @@ main_placeholder = st.empty()
29
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
30
 
31
  if process_url_clicked:
32
- # load data
33
  loader = UnstructuredURLLoader(urls=urls)
34
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
35
  data = loader.load()
36
- # split data
 
37
  text_splitter = RecursiveCharacterTextSplitter(
38
  separators=['\n\n', '\n', '.', ','],
39
  chunk_size=1000
40
  )
41
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
42
  docs = text_splitter.split_documents(data)
43
- # create embeddings and save it to FAISS index
 
44
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
45
- vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
46
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
47
- time.sleep(2)
48
-
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Save the FAISS index to a pickle file
50
  with open(file_path, "wb") as f:
51
  pickle.dump(vectorstore_huggingface, f)
 
 
52
 
53
  query = main_placeholder.text_input("Question: ")
54
  if query:
55
  if os.path.exists(file_path):
 
56
  with open(file_path, "rb") as f:
57
  vectorstore = pickle.load(f)
58
  chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
59
  result = chain({"question": query}, return_only_outputs=True)
60
- # result will be a dictionary of this format --> {"answer": "", "sources": [] }
 
61
  st.header("Answer")
62
  st.write(result["answer"])
63
 
@@ -71,3 +88,4 @@ if query:
71
 
72
 
73
 
 
 
3
  import pickle
4
  import time
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
6
  from langchain.chains import RetrievalQAWithSourcesChain
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.document_loaders import UnstructuredURLLoader
9
  from langchain_groq import ChatGroq
 
10
  from langchain.vectorstores import FAISS
11
+ import numpy as np
12
 
13
  from dotenv import load_dotenv
14
  load_dotenv() # take environment variables from .env (especially openai api key)
 
28
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
29
 
30
  if process_url_clicked:
31
+ # Load data from URLs
32
  loader = UnstructuredURLLoader(urls=urls)
33
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
34
  data = loader.load()
35
+
36
+ # Split data into chunks
37
  text_splitter = RecursiveCharacterTextSplitter(
38
  separators=['\n\n', '\n', '.', ','],
39
  chunk_size=1000
40
  )
41
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
42
  docs = text_splitter.split_documents(data)
43
+
44
+ # Create embeddings using HuggingFaceEmbeddings
45
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
46
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
47
+
48
+ # Generate embeddings
49
+ embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
50
+
51
+ # Convert embeddings to numpy array (needed by FAISS)
52
+ embeddings_np = np.array(embeddings).astype(np.float32)
53
+
54
+ # Create FAISS index
55
+ dimension = len(embeddings[0]) # Embedding vector dimension
56
+ index = FAISS(dimension)
57
+ index.add(embeddings_np) # Add embeddings to FAISS index
58
+
59
+ # Wrap FAISS index using LangChain FAISS wrapper
60
+ vectorstore_huggingface = FAISS(embedding_function=embedding_model, index=index)
61
+
62
  # Save the FAISS index to a pickle file
63
  with open(file_path, "wb") as f:
64
  pickle.dump(vectorstore_huggingface, f)
65
+
66
+ time.sleep(2)
67
 
68
  query = main_placeholder.text_input("Question: ")
69
  if query:
70
  if os.path.exists(file_path):
71
+ # Load the FAISS index from the pickle file
72
  with open(file_path, "rb") as f:
73
  vectorstore = pickle.load(f)
74
  chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
75
  result = chain({"question": query}, return_only_outputs=True)
76
+
77
+ # Display the answer
78
  st.header("Answer")
79
  st.write(result["answer"])
80
 
 
88
 
89
 
90
 
91
+