pyschopoodle commited on
Commit
d7ae972
Β·
verified Β·
1 Parent(s): fd84789

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +71 -0
  2. requirements.txt +11 -0
main.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ import langchain
6
+ from langchain import OpenAI
7
+ from langchain.chains import RetrievalQAWithSourcesChain
8
+ from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.document_loaders import UnstructuredURLLoader
11
+ from langchain.embeddings import OpenAIEmbeddings
12
+ from langchain.vectorstores import FAISS
13
+
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+ # print(os.getenv("OPENAI_API_KEY"))
17
+
18
+
19
+
20
+ st.title("News Research TOOL")
21
+ st.sidebar.title("News Article URLs")
22
+
23
+ urls = []
24
+ for i in range(3):
25
+ url = st.sidebar.text_input(f"URL {i+1}")
26
+ urls.append(url)
27
+ main_placefolder = st.empty()
28
+ llm = OpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"),temperature=0.9,max_tokens=500)
29
+
30
+ process_url_clicked = st.sidebar.button("Process URLs")
31
+ if process_url_clicked:
32
+ loader = UnstructuredURLLoader(urls=urls)
33
+ main_placefolder.text("Data loading....Started...βœ…βœ…βœ…βœ…βœ…")
34
+ data = loader.load()
35
+
36
+ text_splitter = RecursiveCharacterTextSplitter(
37
+ separators=['\n\n','\n','.',','],
38
+ chunk_size=1000
39
+ )
40
+ main_placefolder.text("Text Splitting....Started...βœ…βœ…βœ…βœ…βœ…")
41
+ docs = text_splitter.split_documents(data)
42
+
43
+
44
+ #embbedings
45
+ embeddings = OpenAIEmbeddings()
46
+ vectorstore_openai = FAISS.from_documents(docs,embedding=embeddings)
47
+ main_placefolder.text("Embbeding Vectors....Started...βœ…βœ…βœ…βœ…βœ…")
48
+ time.sleep(2)
49
+ vectorstore_openai.save_local("vectorindex_openai")
50
+
51
+ query = main_placefolder.text_input("Question: ")
52
+ if query:
53
+ if os.path.exists('vectorindex_openai'):
54
+ embeddings = OpenAIEmbeddings()
55
+ vectorindex = FAISS.load_local('vectorindex_openai', embeddings=embeddings,allow_dangerous_deserialization=True)
56
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm,retriever=vectorindex.as_retriever())
57
+ result = chain({"question": query},return_only_outputs=True)
58
+
59
+ st.header("Answer")
60
+ st.write(result['answer'])
61
+
62
+ #Display Source if available
63
+ sources = result.get("sources","")
64
+ if sources:
65
+ st.subheader("Sources:")
66
+ source_list = sources.split("\n")
67
+ for source in source_list:
68
+ st.write(source)
69
+
70
+
71
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.0.284
2
+ python-dotenv==1.0.0
3
+ streamlit==1.22.0
4
+ unstructured==0.9.2
5
+ tiktoken==0.4.0
6
+ faiss-cpu==1.7.4
7
+ libmagic==1.0
8
+ python-magic==0.4.27
9
+ python-magic-bin==0.4.14
10
+ OpenAI
11
+ langchain_community