Kabirsingla commited on
Commit
cfe33c7
Β·
verified Β·
1 Parent(s): e3c34dd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +76 -0
  2. requirements.txt +15 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import pinecone
5
+ import time
6
+ from langchain import OpenAI
7
+ from langchain.chains import RetrievalQAWithSourcesChain
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.document_loaders import UnstructuredURLLoader
10
+ from langchain.embeddings import OpenAIEmbeddings
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from langchain.vectorstores import FAISS
13
+ from langchain.vectorstores import Pinecone
14
+
15
+ from dotenv import load_dotenv
16
+ load_dotenv() # take environment variables from .env (especially openai api key)
17
+
18
+ st.title("Research Tool πŸ“ˆ")
19
+ st.sidebar.title("Article URLs")
20
+
21
+
22
+ urls = []
23
+ for i in range(3):
24
+ url = st.sidebar.text_input(f"URL {i+1}")
25
+ urls.append(url)
26
+
27
+ main_placeholder = st.empty()
28
+
29
+
30
+ query = main_placeholder.text_input("Question: ")
31
+ if query:
32
+ loader = UnstructuredURLLoader(urls=urls)
33
+ main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
34
+ data = loader.load()
35
+ # split data
36
+ text_splitter = RecursiveCharacterTextSplitter(
37
+ separators=['\n\n', '\n', '.', ','],
38
+ chunk_size=1000
39
+ )
40
+ main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
41
+ docs = text_splitter.split_documents(data)
42
+ # create embeddings and save it to FAISS index
43
+ embeddings = OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY'))
44
+
45
+ pinecone.init(
46
+ api_key=os.getenv('PINECONE_API_KEY'),
47
+ environment="gcp-starter"
48
+ )
49
+ index_name = "langchainvector"
50
+ index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
51
+ main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
52
+ def retrieve_query(mquery, k=3):
53
+ matching_results = index.similarity_search(mquery, k=k)
54
+ return matching_results
55
+ llm = OpenAI(temperature=0.5)
56
+ chain = load_qa_chain(llm, chain_type="stuff")
57
+ def retrieve_ans(mquery):
58
+ doc_search = retrieve_query(mquery)
59
+ print(doc_search)
60
+ response = chain.run(input_documents = doc_search, question=query)
61
+ return response
62
+
63
+
64
+ result = retrieve_ans(query)
65
+ st.header("Answer")
66
+ st.write(result)
67
+ # Display sources, if available
68
+ # sources = result.get("sources", "")
69
+ # if sources:
70
+ # st.subheader("Sources:")
71
+ # sources_list = sources.split("\n") # Split the sources by newline
72
+ # for source in sources_list:
73
+ # st.write(source)
74
+
75
+
76
+
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ python-dotenv
3
+ streamlit
4
+ unstructured
5
+ tiktoken
6
+ libmagic
7
+ python-magic
8
+ python-magic-bin
9
+ OpenAI
10
+ pandas
11
+ numpy
12
+ scipy
13
+ pinecone-client
14
+ scikit-learn
15
+ matplotlib