edjdhug3 commited on
Commit
be1eb04
·
1 Parent(s): 33ad917

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py CHANGED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ from langchain import OpenAI
6
+ from langchain.chains import RetrievalQAWithSourcesChain
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.document_loaders import UnstructuredURLLoader
9
+ # from langchain.embeddings import OpenAIEmbeddings
10
+ from langchain.embeddings import FakeEmbeddings
11
+ from langchain.llms import HuggingFaceHub
12
+ from langchain.chains import LLMChain
13
+ from langchain.vectorstores import FAISS
14
+
15
+ from dotenv import load_dotenv
16
+ load_dotenv() # take environment variables from .env (especially openai api key)
17
+
18
+ st.title("RockyBot: News Research Tool 📈")
19
+ st.sidebar.title("News Article URLs")
20
+
21
+ urls = []
22
+ for i in range(3):
23
+ url = st.sidebar.text_input(f"URL {i+1}")
24
+ urls.append(url)
25
+
26
+ process_url_clicked = st.sidebar.button("Process URLs")
27
+ file_path = "faiss_store_openai.pkl"
28
+
29
+ main_placeholder = st.empty()
30
+ llm = HuggingFaceHub( repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 64} )
31
+
32
+ @st.cache
33
+ def process_urls(urls):
34
+ """Processes the given URLs and saves the FAISS index to a pickle file."""
35
+
36
+ # load data
37
+ loader = UnstructuredURLLoader(urls=urls)
38
+
39
+ # split data
40
+ text_splitter = RecursiveCharacterTextSplitter(
41
+ separators=['\n\n', '\n', '.', ','],
42
+ chunk_size=1000
43
+ )
44
+ docs = text_splitter.split_documents(loader.load())
45
+
46
+ # create embeddings and save it to FAISS index
47
+ embeddings = FakeEmbeddings(size=1352)
48
+ vectorstore_openai = FAISS.from_documents(docs, embeddings)
49
+
50
+ # Save the FAISS index to a pickle file
51
+ with open(file_path, "wb") as f:
52
+ pickle.dump(vectorstore_openai, f)
53
+
54
+ if process_url_clicked:
55
+ with st.progress(0.0):
56
+ process_urls(urls)
57
+ st.progress(100.0)
58
+
59
+ query = main_placeholder.text_input("Question: ")
60
+ if query:
61
+ try:
62
+ with open(file_path, "rb") as f:
63
+ vectorstore = pickle.load(f)
64
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
65
+ result = chain({"question": query}, return_only_outputs=True)
66
+ # result will be a dictionary of this format --> {"answer": "", "sources": [] }
67
+ st.header("Answer")
68
+ st.write(result["answer"])
69
+
70
+ # Display sources, if available
71
+ sources = result.get("sources", "")
72
+ if sources:
73
+ st.subheader("Sources:")
74
+ sources_list = sources.split("\n") # Split the sources by newline
75
+ for source in sources_list:
76
+ st.write(source)
77
+ except Exception as e:
78
+ st.error(e)
79
+
80
+ if __name__ == '__main__':
81
+ st.main()