Spaces:

Samizie
/

BotBusters-Project

Sleeping

App Files Files Community

BotBusters-Project / app.py

Samizie

Update app.py

2661fdc verified 15 days ago

raw

history blame contribute delete

4.44 kB

	import subprocess
	import streamlit as st
	#from decouple import config
	import asyncio
	from langchain.chains import create_retrieval_chain
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_groq import ChatGroq
	from langchain_core.prompts import ChatPromptTemplate
	from scraper.scraper import process_urls
	from embedding.vector_store import initialize_vector_store

	subprocess.run(["playwright", "install"], check=True)

	#Groq API Key
	#groq_api = config("GROQ_API_KEY")
	groq_api = "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"

	#Initialize LLM with memory
	llm = ChatGroq(model="llama-3.2-1b-preview", groq_api_key=groq_api, temperature=0)

	# ✅ System Prompt with history
	system_prompt = """
	You are an AI assistant capable of answering user queries.

	- For factual questions, use only the retrieved context below. If the answer is not in the context, say "I don't know."
	- For general conversational inputs (e.g., greetings, small talk), respond naturally.
	- Maintain memory of previous questions and responses to ensure context-aware answers.
	- Do not make assumptions or generate false information.

	Chat History:
	{history}

	Context:
	{context}

	Now, answer concisely.
	"""

	#Chat Prompt
	prompt = ChatPromptTemplate(
	[
	("system", system_prompt),
	("human", "{input}")
	]
	)

	#Ensure proper asyncio handling for Windows
	import sys
	if sys.platform.startswith("win"):
	asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

	#Async helper function
	def run_asyncio_coroutine(coro):
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	return loop.run_until_complete(coro)

	#Streamlit
	st.title("Chat with Scraped Data 🤖")

	#url inputs
	urls = st.text_area("Enter URLs (one per line)")
	run_scraper = st.button("Run Scraper", disabled=not urls.strip())

	#Sessions & states
	if "messages" not in st.session_state:
	st.session_state.messages = [] # Chat history
	if "history" not in st.session_state:
	st.session_state.history = "" # Stores past Q&A for memory
	if "scraping_done" not in st.session_state:
	st.session_state.scraping_done = False
	if "vector_store" not in st.session_state:
	st.session_state.vector_store = None

	#Run scraper
	if run_scraper:
	st.write("Fetching and processing URLs... This may take a while.")
	split_docs = run_asyncio_coroutine(process_urls(urls.split("\n")))
	st.session_state.vector_store = initialize_vector_store(split_docs)
	st.session_state.scraping_done = True
	st.success("Scraping and processing completed!")

	#Ensuring chat only enables after scraping
	if not st.session_state.scraping_done:
	st.warning("Scrape some data first to enable chat!")
	else:
	st.write("### Chat Interface 💬")

	#Display chat history
	for message in st.session_state.messages:
	role, text = message["role"], message["text"]
	with st.chat_message(role):
	st.write(text)

	#Input
	user_query = st.chat_input("Ask a question...")

	if user_query:
	#show user's message
	st.session_state.messages.append({"role": "user", "text": user_query})
	with st.chat_message("user"):
	st.write(user_query)

	#Retrieval
	retriever = st.session_state.vector_store.as_retriever(search_kwargs={'k': 3})
	scraper_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
	llm_chain = create_retrieval_chain(retriever, scraper_chain)

	#Memory update
	history_text = "\n".join(
	[f"User: {msg['text']}" if msg["role"] == "user" else f"AI: {msg['text']}" for msg in st.session_state.messages]
	)
	st.session_state.history = history_text

	#Context retrieve
	retrieved_docs = retriever.invoke(user_query)
	response = llm_chain.invoke({"input": user_query, "history": st.session_state.history})

	answer = response["answer"]
	source_url = retrieved_docs[0].metadata.get("source", "Unknown") if retrieved_docs else "No source found"

	#Format response with memory tracking
	formatted_response = f"Answer: {answer}\n\nSource: {source_url}"

	#Store and display bot response
	st.session_state.messages.append({"role": "assistant", "text": formatted_response})
	with st.chat_message("assistant"):
	st.write(formatted_response)