Maxjohn12 commited on
Commit
e14b711
Β·
verified Β·
1 Parent(s): 5f63353

Upload 5 files

Browse files
Files changed (6) hide show
  1. .env +1 -0
  2. .gitattributes +1 -0
  3. FinSight.jpg +3 -0
  4. README.md +17 -11
  5. main.py +72 -0
  6. requirements.txt +10 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY='enter your openapi key here'
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ FinSight.jpg filter=lfs diff=lfs merge=lfs -text
FinSight.jpg ADDED

Git LFS Details

  • SHA256: fa9cec78793a10b03f8235d0252908bf36df0b5debab73a2c5df4f203f98fcbf
  • Pointer size: 131 Bytes
  • Size of remote file: 117 kB
README.md CHANGED
@@ -1,11 +1,17 @@
1
- ---
2
- title: FinSight
3
- emoji: πŸƒ
4
- colorFrom: pink
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- short_description: FinSight Finance News Research Tool
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
+ # FinSight- Finance News Research Tool
2
+ FinSight is user-friendly news research tool designed for effortless information retrieval. Users can input article URLs and ask questions to receive relevant insights from the stock market and financial domain.
3
+
4
+ ![](FinSight.jpg)
5
+
6
+ ## Features
7
+
8
+ - Load URLs or upload text files containing URLs to fetch article content.
9
+ - Process article content through LangChain's UnstructuredURL Loader
10
+ - Construct an embedding vector using OpenAI's embeddings and leverage FAISS, a powerful similarity search library, to enable swift and effective retrieval of relevant information
11
+ - Interact with the LLM's (Chatgpt) by inputting queries and receiving answers along with source URLs.
12
+
13
+ ## Project Structure
14
+
15
+ - main.py: The main Streamlit application script.
16
+ - requirements.txt: A list of required Python packages for the project.
17
+ - .env: Configuration file for storing your OpenAI API key.
main.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ from langchain import OpenAI
6
+ from langchain.chains import RetrievalQAWithSourcesChain
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.document_loaders import UnstructuredURLLoader
9
+ from langchain.embeddings import OpenAIEmbeddings
10
+ from langchain.vectorstores import FAISS
11
+
12
+ from dotenv import load_dotenv
13
+ load_dotenv() # take environment variables from .env (especially openai api key)
14
+
15
+ st.title("RockyBot: News Research Tool πŸ“ˆ")
16
+ st.sidebar.title("News Article URLs")
17
+
18
+ urls = []
19
+ for i in range(3):
20
+ url = st.sidebar.text_input(f"URL {i+1}")
21
+ urls.append(url)
22
+
23
+ process_url_clicked = st.sidebar.button("Process URLs")
24
+ file_path = "faiss_store_openai.pkl"
25
+
26
+ main_placeholder = st.empty()
27
+ llm = OpenAI(temperature=0.9, max_tokens=500)
28
+
29
+ if process_url_clicked:
30
+ # load data
31
+ loader = UnstructuredURLLoader(urls=urls)
32
+ main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
33
+ data = loader.load()
34
+ # split data
35
+ text_splitter = RecursiveCharacterTextSplitter(
36
+ separators=['\n\n', '\n', '.', ','],
37
+ chunk_size=1000
38
+ )
39
+ main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
40
+ docs = text_splitter.split_documents(data)
41
+ # create embeddings and save it to FAISS index
42
+ embeddings = OpenAIEmbeddings()
43
+ vectorstore_openai = FAISS.from_documents(docs, embeddings)
44
+ main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
45
+ time.sleep(2)
46
+
47
+ # Save the FAISS index to a pickle file
48
+ with open(file_path, "wb") as f:
49
+ pickle.dump(vectorstore_openai, f)
50
+
51
+ query = main_placeholder.text_input("Question: ")
52
+ if query:
53
+ if os.path.exists(file_path):
54
+ with open(file_path, "rb") as f:
55
+ vectorstore = pickle.load(f)
56
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
57
+ result = chain({"question": query}, return_only_outputs=True)
58
+ # result will be a dictionary of this format --> {"answer": "", "sources": [] }
59
+ st.header("Answer")
60
+ st.write(result["answer"])
61
+
62
+ # Display sources, if available
63
+ sources = result.get("sources", "")
64
+ if sources:
65
+ st.subheader("Sources:")
66
+ sources_list = sources.split("\n") # Split the sources by newline
67
+ for source in sources_list:
68
+ st.write(source)
69
+
70
+
71
+
72
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.0.284
2
+ python-dotenv==1.0.0
3
+ streamlit==1.22.0
4
+ unstructured==0.9.2
5
+ tiktoken==0.4.0
6
+ faiss-cpu==1.7.4
7
+ libmagic==1.0
8
+ python-magic==0.4.27
9
+ python-magic-bin==0.4.14
10
+ OpenAI == 0.28.0