edjdhug3 commited on
Commit
6d6b171
·
1 Parent(s): 003ffdc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -49
app.py CHANGED
@@ -6,6 +6,7 @@ from langchain import OpenAI
6
  from langchain.chains import RetrievalQAWithSourcesChain
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.document_loaders import UnstructuredURLLoader
 
9
  from langchain.embeddings import FakeEmbeddings
10
  from langchain.llms import HuggingFaceHub
11
  from langchain.chains import LLMChain
@@ -13,64 +14,65 @@ from langchain.vectorstores import FAISS
13
 
14
  from dotenv import load_dotenv
15
  load_dotenv() # take environment variables from .env (especially openai api key)
16
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
17
 
 
 
18
 
19
- class RockyBot:
20
- def __init__(self, llm):
21
- self.llm = llm
22
- self.vectorstore = None
23
 
24
- def process_urls(self, urls):
25
- """Processes the given URLs and saves the FAISS index to a pickle file."""
26
 
27
- # load data
28
- loader = UnstructuredURLLoader(urls=urls)
29
 
30
- # split data
31
- text_splitter = RecursiveCharacterTextSplitter(
32
- separators=['\n\n', '\n', '.', ','],
33
- chunk_size=1000
34
- )
35
- docs = text_splitter.split_documents(loader.load())
36
 
37
- # create embeddings and save it to FAISS index
38
- embeddings = FakeEmbeddings(size=1352)
39
- self.vectorstore = FAISS.from_documents(docs, embeddings)
40
 
41
- # Save the FAISS index to a pickle file
42
- with open("faiss_store_openai.pkl", "wb") as f:
43
- pickle.dump(self.vectorstore, f)
 
 
 
44
 
45
- def answer_question(self, question):
46
- """Answers the given question using the LLM and retriever."""
 
47
 
48
- chain = RetrievalQAWithSourcesChain.from_llm(llm=self.llm, retriever=self.vectorstore.as_retriever())
49
- result = chain({"question": question}, return_only_outputs=True)
 
50
 
51
- return result["answer"], result.get("sources", "")
52
-
53
-
54
- if __name__ == '__main__':
55
- llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 64})
56
- rockybot = RockyBot(llm)
57
-
58
- # Process URLs if the button is clicked
59
- if st.sidebar.button("Process URLs"):
60
- rockybot.process_urls(st.sidebar.text_input("URL 1"), st.sidebar.text_input("URL 2"), st.sidebar.text_input("URL 3"))
61
  st.progress(100.0)
62
 
63
- # Answer the question if it is not empty
64
- query = st.text_input("Question: ")
65
- if query:
66
- answer, sources = rockybot.answer_question(query)
67
-
68
- st.header("Answer")
69
- st.write(answer)
70
-
71
- # Display sources, if available
72
- if sources:
73
- st.subheader("Sources:")
74
- for source in sources.split("\n"):
75
- st.write(source)
76
-
 
 
 
 
 
 
 
6
  from langchain.chains import RetrievalQAWithSourcesChain
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.document_loaders import UnstructuredURLLoader
9
+ # from langchain.embeddings import OpenAIEmbeddings
10
  from langchain.embeddings import FakeEmbeddings
11
  from langchain.llms import HuggingFaceHub
12
  from langchain.chains import LLMChain
 
14
 
15
  from dotenv import load_dotenv
16
  load_dotenv() # take environment variables from .env (especially openai api key)
 
17
 
18
+ st.title("RockyBot: News Research Tool 📈")
19
+ st.sidebar.title("News Article URLs")
20
 
21
+ urls = []
22
+ for i in range(3):
23
+ url = st.sidebar.text_input(f"URL {i+1}")
24
+ urls.append(url)
25
 
26
+ process_url_clicked = st.sidebar.button("Process URLs")
27
+ file_path = "faiss_store_openai.pkl"
28
 
29
+ main_placeholder = st.empty()
30
+ llm = HuggingFaceHub( repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 64} )
31
 
32
+ @st.cache
33
+ def process_urls(urls):
34
+ """Processes the given URLs and saves the FAISS index to a pickle file."""
 
 
 
35
 
36
+ # load data
37
+ loader = UnstructuredURLLoader(urls=urls)
 
38
 
39
+ # split data
40
+ text_splitter = RecursiveCharacterTextSplitter(
41
+ separators=['\n\n', '\n', '.', ','],
42
+ chunk_size=1000
43
+ )
44
+ docs = text_splitter.split_documents(loader.load())
45
 
46
+ # create embeddings and save it to FAISS index
47
+ embeddings = FakeEmbeddings(size=1352)
48
+ vectorstore_openai = FAISS.from_documents(docs, embeddings)
49
 
50
+ # Save the FAISS index to a pickle file
51
+ with open(file_path, "wb") as f:
52
+ pickle.dump(vectorstore_openai, f)
53
 
54
+ if process_url_clicked:
55
+ with st.progress(0.0):
56
+ process_urls(urls)
 
 
 
 
 
 
 
57
  st.progress(100.0)
58
 
59
+ query = main_placeholder.text_input("Question: ")
60
+ if query:
61
+ try:
62
+ with open(file_path, "rb") as f:
63
+ vectorstore = pickle.load(f)
64
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
65
+ result = chain({"question": query}, return_only_outputs=True)
66
+ # result will be a dictionary of this format --> {"answer": "", "sources": [] }
67
+ st.header("Answer")
68
+ st.write(result["answer"])
69
+
70
+ # Display sources, if available
71
+ sources = result.get("sources", "")
72
+ if sources:
73
+ st.subheader("Sources:")
74
+ sources_list = sources.split("\n") # Split the sources by newline
75
+ for source in sources_list:
76
+ st.write(source)
77
+ except Exception as e:
78
+ st.error(e)