AdityaTheDev commited on
Commit
b21273b
·
verified ·
1 Parent(s): f2a7414

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -104
app.py CHANGED
@@ -1,104 +1,105 @@
1
- import os
2
- import streamlit as st
3
- import pickle
4
- import time
5
- from langchain.chains import RetrievalQA
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain.document_loaders import UnstructuredURLLoader
8
- #from langchain.vectorstores import FAISS
9
- from langchain_community.vectorstores import FAISS
10
- from langchain_huggingface import HuggingFaceEndpoint
11
- from sentence_transformers import SentenceTransformer
12
- from langchain.embeddings import HuggingFaceEmbeddings
13
- from langchain import HuggingFaceHub
14
- from dotenv import load_dotenv
15
-
16
- load_dotenv()
17
- repo_id = "mistralai/Mistral-7B-Instruct-v0.3"#"mistralai/Mistral-7B-Instruct-v0.3"
18
- llm = HuggingFaceHub(
19
- repo_id=repo_id,
20
- task="text-generation",
21
- huggingfacehub_api_token=os.getenv("HF_TOKEN_FOR_WEBSEARCH"),
22
- model_kwargs={"temperature": 0.6,
23
- "max_tokens":1000}
24
- )
25
-
26
- st.title("Article Research Tool 🔎")
27
- st.sidebar.title("Article URLs")
28
-
29
- # Initialize session state to store the number of URL inputs
30
- if 'url_count' not in st.session_state:
31
- st.session_state.url_count = 1 # Start with 3 URL placeholders
32
-
33
- # Function to add a new URL input
34
- def add_url():
35
- st.session_state.url_count += 1
36
-
37
-
38
- # List to store the URLs
39
- urls = []
40
-
41
- # Create the URL input fields dynamically
42
- for i in range(st.session_state.url_count):
43
- url = st.sidebar.text_input(f"URL {i+1}")
44
- urls.append(url)
45
- # Add a button to increase the number of URLs
46
- st.sidebar.button("Add another URL", on_click=add_url)
47
- process_url_clicked=st.sidebar.button("Submit URLs")
48
-
49
-
50
- # urls=[]
51
- # for i in range(3):
52
- # url=st.sidebar.text_input(f"URL {i+1}")
53
- # urls.append(url)
54
- # process_url_clicked=st.sidebar.button("Process URLs")
55
-
56
-
57
- file_path="faiss_store_db.pkl"
58
- placeholder=st.empty()
59
-
60
- if process_url_clicked:
61
- #Loading the data
62
- loader=UnstructuredURLLoader(urls=urls)
63
- placeholder.text("Data Loading started...")
64
- data=loader.load()
65
- #Splitting the data
66
- text_splitter=RecursiveCharacterTextSplitter(
67
- separators=['\n\n','\n','.','.'],
68
- chunk_size=600
69
- )
70
- placeholder.text("Splitting of Data Started...")
71
- docs=text_splitter.split_documents(data)
72
- #creating embeddings
73
- model_name = "sentence-transformers/all-mpnet-base-v2" #"sentence-transformers/all-MiniLM-L6-v2"
74
- hf_embeddings = HuggingFaceEmbeddings(model_name=model_name)
75
- vector_index=FAISS.from_documents(docs,hf_embeddings)
76
- placeholder.text("Started Building Embedded Vector...")
77
- #saving in FAISS store
78
- with open(file_path,'wb') as f:
79
- pickle.dump(vector_index,f)
80
-
81
- query=placeholder.text_input("Question :")
82
- submit=st.button("Submit")
83
- if query:
84
- if os.path.exists(file_path):
85
- with open(file_path,'rb') as f:
86
- vector_index=pickle.load(f)
87
- retrieval_qa = RetrievalQA.from_chain_type(
88
- llm=llm,
89
- chain_type="stuff", # You can use 'stuff', 'map_reduce', or 'refine' depending on your use case
90
- retriever=vector_index.as_retriever()
91
- )
92
- result=retrieval_qa({'query':query})
93
- text=result['result']
94
-
95
- start_index = text.find("\nHelpful Answer:")
96
-
97
- # Extract everything after "\nHelpful Answer:" if it exists
98
- if start_index != -1:
99
- parsed_text =text[start_index + len("\nHelpful Answer:"):]
100
- parsed_text = parsed_text.strip() # Optionally strip any extra whitespace
101
- if query or submit:
102
- st.header("Answer :")
103
- st.write(parsed_text)
104
-
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ from langchain.chains import RetrievalQA
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.document_loaders import UnstructuredURLLoader
8
+ #from langchain.vectorstores import FAISS
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_huggingface import HuggingFaceEndpoint
11
+ from sentence_transformers import SentenceTransformer
12
+ from langchain.embeddings import HuggingFaceEmbeddings
13
+ from langchain import HuggingFaceHub
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv()
17
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.3"#"mistralai/Mistral-7B-Instruct-v0.3"
18
+ llm = HuggingFaceHub(
19
+ repo_id=repo_id,
20
+ task="text-generation",
21
+ huggingfacehub_api_token=os.getenv("HF_TOKEN_FOR_WEBSEARCH"),
22
+ model_kwargs={"temperature": 0.6,
23
+ "max_tokens":1000}
24
+ )
25
+
26
+ st.title("Article Research Tool 🔎")
27
+ st.sidebar.title("Article URLs")
28
+
29
+ # Initialize session state to store the number of URL inputs
30
+ if 'url_count' not in st.session_state:
31
+ st.session_state.url_count = 1 # Start with 3 URL placeholders
32
+
33
+ # Function to add a new URL input
34
+ def add_url():
35
+ st.session_state.url_count += 1
36
+
37
+
38
+ # List to store the URLs
39
+ urls = []
40
+
41
+ # Create the URL input fields dynamically
42
+ for i in range(st.session_state.url_count):
43
+ url = st.sidebar.text_input(f"URL {i+1}")
44
+ urls.append(url)
45
+ # Add a button to increase the number of URLs
46
+ st.sidebar.button("Add another URL", on_click=add_url)
47
+ process_url_clicked=st.sidebar.button("Submit URLs")
48
+
49
+
50
+ # urls=[]
51
+ # for i in range(3):
52
+ # url=st.sidebar.text_input(f"URL {i+1}")
53
+ # urls.append(url)
54
+ # process_url_clicked=st.sidebar.button("Process URLs")
55
+
56
+
57
+ file_path="faiss_store_db.pkl"
58
+ placeholder=st.empty()
59
+
60
+ if process_url_clicked:
61
+ #Loading the data
62
+ loader=UnstructuredURLLoader(urls=urls)
63
+ placeholder.text("Data Loading started...")
64
+ data=loader.load()
65
+ #Splitting the data
66
+ text_splitter=RecursiveCharacterTextSplitter(
67
+ separators=['\n\n','\n','.','.'],
68
+ chunk_size=600,
69
+ chunk_overlap=100
70
+ )
71
+ placeholder.text("Splitting of Data Started...")
72
+ docs=text_splitter.split_documents(data)
73
+ #creating embeddings
74
+ model_name = "sentence-transformers/all-mpnet-base-v2" #"sentence-transformers/all-MiniLM-L6-v2"
75
+ hf_embeddings = HuggingFaceEmbeddings(model_name=model_name)
76
+ vector_index=FAISS.from_documents(docs,hf_embeddings)
77
+ placeholder.text("Started Building Embedded Vector...")
78
+ #saving in FAISS store
79
+ with open(file_path,'wb') as f:
80
+ pickle.dump(vector_index,f)
81
+
82
+ query=placeholder.text_input("Question :")
83
+ submit=st.button("Submit")
84
+ if query:
85
+ if os.path.exists(file_path):
86
+ with open(file_path,'rb') as f:
87
+ vector_index=pickle.load(f)
88
+ retrieval_qa = RetrievalQA.from_chain_type(
89
+ llm=llm,
90
+ chain_type="stuff", # You can use 'stuff', 'map_reduce', or 'refine' depending on your use case
91
+ retriever=vector_index.as_retriever()
92
+ )
93
+ result=retrieval_qa({'query':query})
94
+ text=result['result']
95
+
96
+ start_index = text.find("\nHelpful Answer:")
97
+
98
+ # Extract everything after "\nHelpful Answer:" if it exists
99
+ if start_index != -1:
100
+ parsed_text =text[start_index + len("\nHelpful Answer:"):]
101
+ parsed_text = parsed_text.strip() # Optionally strip any extra whitespace
102
+ if query or submit:
103
+ st.header("Answer :")
104
+ st.write(parsed_text)
105
+