Spaces:

DrishtiSharma
/

docqa-assistant-using-crewai

Sleeping

App Files Files Community

docqa-assistant-using-crewai / app.py

DrishtiSharma

Create app.py

072d33b verified 7 months ago

raw

history blame contribute delete

3.99 kB

	import os
	import streamlit as st
	from crewai import Agent, Task, Crew
	from crewai_tools import LlamaIndexTool
	from langchain_groq import ChatGroq
	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	import tempfile
	import requests

	# --- Streamlit UI Header ---
	st.title("Document Q&A Assistant with CrewAI")
	st.write("Upload a document, provide a link, or ask questions dynamically!")

	# --- Key Configuration from Secrets ---
	try:
	GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
	TAVILY_API_KEY = st.secrets["TAVILY_API_KEY"]
	except KeyError as e:
	st.error(f"Missing API key in secrets: {e}. Please add it to your environment.")
	st.stop()

	# Check if all API keys are available
	if not GROQ_API_KEY or not TAVILY_API_KEY:
	st.error("One or more required API keys are missing. Please check your configuration.")
	st.stop()

	# Function to download PDF from URL
	def download_pdf_from_url(url, save_path):
	response = requests.get(url)
	if response.status_code == 200:
	with open(save_path, 'wb') as f:
	f.write(response.content)
	return save_path
	else:
	st.error("Failed to download PDF from the provided URL.")
	return None

	# --- User Inputs for File or Link ---
	document_source = st.radio("Choose input method:", ("Upload a PDF", "Provide PDF URL"))

	pdf_path = None
	if document_source == "Upload a PDF":
	uploaded_file = st.file_uploader("Upload a PDF file", type=['pdf'])
	if uploaded_file:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
	temp_file.write(uploaded_file.getvalue())
	pdf_path = temp_file.name
	st.success("File uploaded successfully!")
	else:
	pdf_url = st.text_input("Enter PDF URL")
	if st.button("Download PDF") and pdf_url:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
	saved_path = download_pdf_from_url(pdf_url, temp_file.name)
	if saved_path:
	pdf_path = saved_path

	# --- LLM Configuration ---
	llm = ChatGroq(groq_api_key=GROQ_API_KEY, model="groq/llama-3.3-70b-versatile")

	# Function to create Query Engine
	def create_query_engine(pdf_path, llm):
	reader = SimpleDirectoryReader(input_files=[pdf_path])
	docs = reader.load_data()
	embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
	index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)
	return index.as_query_engine(similarity_top_k=5)

	# --- Streamlit Question Workflow ---
	if pdf_path:
	st.success("PDF loaded successfully!")
	query_engine = create_query_engine(pdf_path, llm)
	query_tool = LlamaIndexTool.from_query_engine(
	query_engine,
	name="Document Query Tool",
	description="Tool to analyze and retrieve information from the uploaded document."
	)

	# Define Agents and Tasks
	researcher = Agent(
	role="Document Analyst",
	goal="Analyze documents and answer questions",
	backstory="Expert at retrieving insights from documents.",
	verbose=True,
	allow_delegation=False,
	tools=[query_tool],
	llm=llm,
	)

	task = Task(
	description="Answer user queries based on the uploaded document.",
	expected_output="Clear and concise answers to user questions.",
	agent=researcher,
	)

	crew = Crew(agents=[researcher], tasks=[task], verbose=True)

	st.subheader("Ask a Question")
	user_question = st.text_input("Enter your question")

	if st.button("Get Answer"):
	with st.spinner("Processing your request..."):
	result = crew.kickoff(inputs={"question": user_question})
	st.success("Here is the answer:")
	st.write(result)
	else:
	st.warning("Please upload a PDF or provide a valid URL to continue.")

	# --- Clean Up ---
	if pdf_path and os.path.exists(pdf_path):
	os.remove(pdf_path)