AdityaTheDev commited on
Commit
6bb421c
·
verified ·
1 Parent(s): 012c9d4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ from langchain.chains import RetrievalQA
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.document_loaders import UnstructuredURLLoader
8
+ #from langchain.vectorstores import FAISS
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_huggingface import HuggingFaceEndpoint
11
+ from sentence_transformers import SentenceTransformer
12
+ from langchain.embeddings import HuggingFaceEmbeddings
13
+ from langchain import HuggingFaceHub
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv()
17
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.3"#"mistralai/Mistral-7B-Instruct-v0.3"
18
+ llm = HuggingFaceHub(
19
+ repo_id=repo_id,
20
+ task="text-generation",
21
+ huggingfacehub_api_token=os.getenv("HF_TOKEN_FOR_WEBSEARCH"),
22
+ model_kwargs={"temperature": 0.6,
23
+ "max_tokens":1000}
24
+ )
25
+
26
+ st.title("Article Research Tool 🔎")
27
+ st.sidebar.title("Article URLs")
28
+
29
+ # Initialize session state to store the number of URL inputs
30
+ if 'url_count' not in st.session_state:
31
+ st.session_state.url_count = 1 # Start with 3 URL placeholders
32
+
33
+ # Function to add a new URL input
34
+ def add_url():
35
+ st.session_state.url_count += 1
36
+
37
+
38
+ # List to store the URLs
39
+ urls = []
40
+
41
+ # Create the URL input fields dynamically
42
+ for i in range(st.session_state.url_count):
43
+ url = st.sidebar.text_input(f"URL {i+1}")
44
+ urls.append(url)
45
+ # Add a button to increase the number of URLs
46
+ st.sidebar.button("Add another URL", on_click=add_url)
47
+ process_url_clicked=st.sidebar.button("Submit URLs")
48
+
49
+
50
+ # urls=[]
51
+ # for i in range(3):
52
+ # url=st.sidebar.text_input(f"URL {i+1}")
53
+ # urls.append(url)
54
+ # process_url_clicked=st.sidebar.button("Process URLs")
55
+
56
+
57
+ file_path="faiss_store_db.pkl"
58
+ placeholder=st.empty()
59
+
60
+ if process_url_clicked:
61
+ #Loading the data
62
+ loader=UnstructuredURLLoader(urls=urls)
63
+ placeholder.text("Data Loading started...")
64
+ data=loader.load()
65
+ #Splitting the data
66
+ text_splitter=RecursiveCharacterTextSplitter(
67
+ separators=['\n\n','\n','.','.'],
68
+ chunk_size=600
69
+ )
70
+ placeholder.text("Splitting of Data Started...")
71
+ docs=text_splitter.split_documents(data)
72
+ #creating embeddings
73
+ model_name = "sentence-transformers/all-mpnet-base-v2" #"sentence-transformers/all-MiniLM-L6-v2"
74
+ hf_embeddings = HuggingFaceEmbeddings(model_name=model_name)
75
+ vector_index=FAISS.from_documents(docs,hf_embeddings)
76
+ placeholder.text("Started Building Embedded Vector...")
77
+ #saving in FAISS store
78
+ with open(file_path,'wb') as f:
79
+ pickle.dump(vector_index,f)
80
+
81
+ query=placeholder.text_input("Question :")
82
+ submit=st.button("Submit")
83
+ if query:
84
+ if os.path.exists(file_path):
85
+ with open(file_path,'rb') as f:
86
+ vector_index=pickle.load(f)
87
+ retrieval_qa = RetrievalQA.from_chain_type(
88
+ llm=llm,
89
+ chain_type="stuff", # You can use 'stuff', 'map_reduce', or 'refine' depending on your use case
90
+ retriever=vector_index.as_retriever()
91
+ )
92
+ result=retrieval_qa({'query':query})
93
+ text=result['result']
94
+
95
+ start_index = text.find("\nHelpful Answer:")
96
+
97
+ # Extract everything after "\nHelpful Answer:" if it exists
98
+ if start_index != -1:
99
+ parsed_text =text[start_index + len("\nHelpful Answer:"):]
100
+ parsed_text = parsed_text.strip() # Optionally strip any extra whitespace
101
+ if query or submit:
102
+ st.header("Answer :")
103
+ st.write(parsed_text)
104
+