Deaksh commited on
Commit
a78722c
Β·
verified Β·
1 Parent(s): 0bf671e

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +73 -0
main.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain import OpenAI
7
+ from langchain.chains import RetrievalQAWithSourcesChain
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.document_loaders import UnstructuredURLLoader
10
+ from langchain_groq import ChatGroq
11
+ from langchain.embeddings import OpenAIEmbeddings
12
+ from langchain.vectorstores import FAISS
13
+
14
+ from dotenv import load_dotenv
15
+ load_dotenv() # take environment variables from .env (especially openai api key)
16
+
17
+ st.title("RockyBot: News Research Tool πŸ“ˆ")
18
+ st.sidebar.title("News Article URLs")
19
+
20
+ urls = []
21
+ for i in range(3):
22
+ url = st.sidebar.text_input(f"URL {i+1}")
23
+ urls.append(url)
24
+
25
+ process_url_clicked = st.sidebar.button("Process URLs")
26
+ file_path = "faiss_store_openai.pkl"
27
+
28
+ main_placeholder = st.empty()
29
+ llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
30
+
31
+ if process_url_clicked:
32
+ # load data
33
+ loader = UnstructuredURLLoader(urls=urls)
34
+ main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
35
+ data = loader.load()
36
+ # split data
37
+ text_splitter = RecursiveCharacterTextSplitter(
38
+ separators=['\n\n', '\n', '.', ','],
39
+ chunk_size=1000
40
+ )
41
+ main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
42
+ docs = text_splitter.split_documents(data)
43
+ # create embeddings and save it to FAISS index
44
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
45
+ vectorstore_huggingface = FAISS.from_documents(docs, embedding_model)
46
+ main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
47
+ time.sleep(2)
48
+
49
+ # Save the FAISS index to a pickle file
50
+ with open(file_path, "wb") as f:
51
+ pickle.dump(vectorstore_huggingface, f)
52
+
53
+ query = main_placeholder.text_input("Question: ")
54
+ if query:
55
+ if os.path.exists(file_path):
56
+ with open(file_path, "rb") as f:
57
+ vectorstore = pickle.load(f)
58
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
59
+ result = chain({"question": query}, return_only_outputs=True)
60
+ # result will be a dictionary of this format --> {"answer": "", "sources": [] }
61
+ st.header("Answer")
62
+ st.write(result["answer"])
63
+
64
+ # Display sources, if available
65
+ sources = result.get("sources", "")
66
+ if sources:
67
+ st.subheader("Sources:")
68
+ sources_list = sources.split("\n") # Split the sources by newline
69
+ for source in sources_list:
70
+ st.write(source)
71
+
72
+
73
+