Samizie's picture
Update app.py
2661fdc verified
import subprocess
import streamlit as st
#from decouple import config
import asyncio
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from scraper.scraper import process_urls
from embedding.vector_store import initialize_vector_store
subprocess.run(["playwright", "install"], check=True)
#Groq API Key
#groq_api = config("GROQ_API_KEY")
groq_api = "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"
#Initialize LLM with memory
llm = ChatGroq(model="llama-3.2-1b-preview", groq_api_key=groq_api, temperature=0)
# βœ… System Prompt with history
system_prompt = """
You are an AI assistant capable of answering user queries.
- **For factual questions**, use **only** the retrieved context below. If the answer is not in the context, say **"I don't know."**
- **For general conversational inputs** (e.g., greetings, small talk), respond naturally.
- **Maintain memory of previous questions and responses to ensure context-aware answers.**
- **Do not make assumptions or generate false information.**
**Chat History:**
{history}
**Context:**
{context}
Now, answer concisely.
"""
#Chat Prompt
prompt = ChatPromptTemplate(
[
("system", system_prompt),
("human", "{input}")
]
)
#Ensure proper asyncio handling for Windows
import sys
if sys.platform.startswith("win"):
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
#Async helper function
def run_asyncio_coroutine(coro):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop.run_until_complete(coro)
#Streamlit
st.title("Chat with Scraped Data πŸ€–")
#url inputs
urls = st.text_area("Enter URLs (one per line)")
run_scraper = st.button("Run Scraper", disabled=not urls.strip())
#Sessions & states
if "messages" not in st.session_state:
st.session_state.messages = [] # Chat history
if "history" not in st.session_state:
st.session_state.history = "" # Stores past Q&A for memory
if "scraping_done" not in st.session_state:
st.session_state.scraping_done = False
if "vector_store" not in st.session_state:
st.session_state.vector_store = None
#Run scraper
if run_scraper:
st.write("Fetching and processing URLs... This may take a while.")
split_docs = run_asyncio_coroutine(process_urls(urls.split("\n")))
st.session_state.vector_store = initialize_vector_store(split_docs)
st.session_state.scraping_done = True
st.success("Scraping and processing completed!")
#Ensuring chat only enables after scraping
if not st.session_state.scraping_done:
st.warning("Scrape some data first to enable chat!")
else:
st.write("### Chat Interface πŸ’¬")
#Display chat history
for message in st.session_state.messages:
role, text = message["role"], message["text"]
with st.chat_message(role):
st.write(text)
#Input
user_query = st.chat_input("Ask a question...")
if user_query:
#show user's message
st.session_state.messages.append({"role": "user", "text": user_query})
with st.chat_message("user"):
st.write(user_query)
#Retrieval
retriever = st.session_state.vector_store.as_retriever(search_kwargs={'k': 3})
scraper_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
llm_chain = create_retrieval_chain(retriever, scraper_chain)
#Memory update
history_text = "\n".join(
[f"User: {msg['text']}" if msg["role"] == "user" else f"AI: {msg['text']}" for msg in st.session_state.messages]
)
st.session_state.history = history_text
#Context retrieve
retrieved_docs = retriever.invoke(user_query)
response = llm_chain.invoke({"input": user_query, "history": st.session_state.history})
answer = response["answer"]
source_url = retrieved_docs[0].metadata.get("source", "Unknown") if retrieved_docs else "No source found"
#Format response with memory tracking
formatted_response = f"**Answer:** {answer}\n\n**Source:** {source_url}"
#Store and display bot response
st.session_state.messages.append({"role": "assistant", "text": formatted_response})
with st.chat_message("assistant"):
st.write(formatted_response)