Samagra07's picture
Upload 2 files
3618625 verified
raw
history blame
3.2 kB
import streamlit as st
from dotenv import load_dotenv
from langchain_community.document_loaders.url import UnstructuredURLLoader
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import time
from langchain_together import ChatTogether
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
# Load environment variables
load_dotenv()
# Set Streamlit app title
st.title("News Research Tool πŸ“ˆ")
st.sidebar.title("News Article URLs")
# Get URLs from user input
urls = []
for i in range(3):
url = st.sidebar.text_input(f"URL {i+1}")
urls.append(url)
# Button to process URLs
process_url_clicked = st.sidebar.button("Process URLs")
faiss_index_path = "faiss_index"
# Placeholder for main content
main_placeholder = st.empty()
# Initialize the OpenAI LLM
llm = ChatTogether(model="meta-llama/Llama-Vision-Free", api_key=os.getenv("OPENAI_API_KEY"))
def save_faiss_index(vectorstore, path):
# Save FAISS index and documents separately
vectorstore.save_local(path)
def load_faiss_index(path, embeddings):
# Load FAISS index and recreate vectorstore
return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
if process_url_clicked:
# Load data from URLs
loader = UnstructuredURLLoader(urls=urls)
main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
data = loader.load()
# Split data into chunks
text_splitter = RecursiveCharacterTextSplitter(
separators=['\n\n', '\n', '.', ','],
chunk_size=1000
)
main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
docs = text_splitter.split_documents(data)
# Create embeddings and save them to FAISS index
embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
vectorstore_openai = FAISS.from_documents(docs, embeddings)
main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
time.sleep(2)
# Save the FAISS index to disk
save_faiss_index(vectorstore_openai, faiss_index_path)
# Get query from user input
query = main_placeholder.text_input("Question: ")
if query:
if os.path.exists(faiss_index_path):
embeddings = HuggingFaceHubEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
vectorstore = load_faiss_index(faiss_index_path, embeddings)
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
result = chain({"question": query}, return_only_outputs=True)
# Display the answer
st.header("Answer")
st.write(result["answer"])
# Display sources, if available
sources = result.get("sources", "")
if sources:
st.subheader("Sources:")
sources_list = sources.split("\n")
for source in sources_list:
st.write(source)