Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import pickle | |
import time | |
import langchain | |
from langchain import OpenAI | |
from langchain.chains import RetrievalQAWithSourcesChain | |
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import UnstructuredURLLoader | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from dotenv import load_dotenv | |
load_dotenv() | |
# print(os.getenv("OPENAI_API_KEY")) | |
st.title("News Research TOOL") | |
st.sidebar.title("News Article URLs") | |
urls = [] | |
for i in range(3): | |
url = st.sidebar.text_input(f"URL {i+1}") | |
urls.append(url) | |
main_placefolder = st.empty() | |
llm = OpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"),temperature=0.9,max_tokens=500) | |
process_url_clicked = st.sidebar.button("Process URLs") | |
if process_url_clicked: | |
loader = UnstructuredURLLoader(urls=urls) | |
main_placefolder.text("Data loading....Started...β β β β β ") | |
data = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
separators=['\n\n','\n','.',','], | |
chunk_size=1000 | |
) | |
main_placefolder.text("Text Splitting....Started...β β β β β ") | |
docs = text_splitter.split_documents(data) | |
#embbedings | |
embeddings = OpenAIEmbeddings() | |
vectorstore_openai = FAISS.from_documents(docs,embedding=embeddings) | |
main_placefolder.text("Embbeding Vectors....Started...β β β β β ") | |
time.sleep(2) | |
vectorstore_openai.save_local("vectorindex_openai") | |
query = main_placefolder.text_input("Question: ") | |
if query: | |
if os.path.exists('vectorindex_openai'): | |
embeddings = OpenAIEmbeddings() | |
vectorindex = FAISS.load_local('vectorindex_openai', embeddings=embeddings,allow_dangerous_deserialization=True) | |
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm,retriever=vectorindex.as_retriever()) | |
result = chain({"question": query},return_only_outputs=True) | |
st.header("Answer") | |
st.write(result['answer']) | |
#Display Source if available | |
sources = result.get("sources","") | |
if sources: | |
st.subheader("Sources:") | |
source_list = sources.split("\n") | |
for source in source_list: | |
st.write(source) | |