import streamlit as st from dotenv import load_dotenv from langchain_community.document_loaders.url import UnstructuredURLLoader from langchain_community.embeddings import HuggingFaceHubEmbeddings from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings from langchain_community.vectorstores.faiss import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter import os import time from langchain_together import ChatTogether from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain # Load environment variables load_dotenv() # Set Streamlit app title st.title("News Research Tool 📈") st.sidebar.title("News Article URLs") # Get URLs from user input urls = [] for i in range(3): url = st.sidebar.text_input(f"URL {i+1}") urls.append(url) # Button to process URLs process_url_clicked = st.sidebar.button("Process URLs") faiss_index_path = "faiss_index" # Placeholder for main content main_placeholder = st.empty() # Initialize the OpenAI LLM llm = ChatTogether(model="meta-llama/Llama-Vision-Free", api_key=os.getenv("OPENAI_API_KEY")) def save_faiss_index(vectorstore, path): # Save FAISS index and documents separately vectorstore.save_local(path) def load_faiss_index(path, embeddings): # Load FAISS index and recreate vectorstore return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True) if process_url_clicked: # Load data from URLs loader = UnstructuredURLLoader(urls=urls) main_placeholder.text("Data Loading...Started...✅✅✅") data = loader.load() # Split data into chunks text_splitter = RecursiveCharacterTextSplitter( separators=['\n\n', '\n', '.', ','], chunk_size=1000 ) main_placeholder.text("Text Splitter...Started...✅✅✅") docs = text_splitter.split_documents(data) # Create embeddings and save them to FAISS index embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) vectorstore_openai = FAISS.from_documents(docs, embeddings) main_placeholder.text("Embedding Vector Started Building...✅✅✅") time.sleep(2) # Save the FAISS index to disk save_faiss_index(vectorstore_openai, faiss_index_path) # Get query from user input query = main_placeholder.text_input("Question: ") if query: if os.path.exists(faiss_index_path): embeddings = HuggingFaceHubEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) vectorstore = load_faiss_index(faiss_index_path, embeddings) chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever()) result = chain({"question": query}, return_only_outputs=True) # Display the answer st.header("Answer") st.write(result["answer"]) # Display sources, if available sources = result.get("sources", "") if sources: st.subheader("Sources:") sources_list = sources.split("\n") for source in sources_list: st.write(source)