import os import streamlit as st from crewai import Agent, Task, Crew from crewai_tools import LlamaIndexTool from langchain_groq import ChatGroq from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from llama_index.embeddings.huggingface import HuggingFaceEmbedding import tempfile import requests # --- Streamlit UI Header --- st.title("Document Q&A Assistant with CrewAI") st.write("Upload a document, provide a link, or ask questions dynamically!") # --- Key Configuration from Secrets --- try: GROQ_API_KEY = st.secrets["GROQ_API_KEY"] TAVILY_API_KEY = st.secrets["TAVILY_API_KEY"] except KeyError as e: st.error(f"Missing API key in secrets: {e}. Please add it to your environment.") st.stop() # Check if all API keys are available if not GROQ_API_KEY or not TAVILY_API_KEY: st.error("One or more required API keys are missing. Please check your configuration.") st.stop() # Function to download PDF from URL def download_pdf_from_url(url, save_path): response = requests.get(url) if response.status_code == 200: with open(save_path, 'wb') as f: f.write(response.content) return save_path else: st.error("Failed to download PDF from the provided URL.") return None # --- User Inputs for File or Link --- document_source = st.radio("Choose input method:", ("Upload a PDF", "Provide PDF URL")) pdf_path = None if document_source == "Upload a PDF": uploaded_file = st.file_uploader("Upload a PDF file", type=['pdf']) if uploaded_file: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(uploaded_file.getvalue()) pdf_path = temp_file.name st.success("File uploaded successfully!") else: pdf_url = st.text_input("Enter PDF URL") if st.button("Download PDF") and pdf_url: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: saved_path = download_pdf_from_url(pdf_url, temp_file.name) if saved_path: pdf_path = saved_path # --- LLM Configuration --- llm = ChatGroq(groq_api_key=GROQ_API_KEY, model="groq/llama-3.3-70b-versatile") # Function to create Query Engine def create_query_engine(pdf_path, llm): reader = SimpleDirectoryReader(input_files=[pdf_path]) docs = reader.load_data() embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") index = VectorStoreIndex.from_documents(docs, embed_model=embed_model) return index.as_query_engine(similarity_top_k=5) # --- Streamlit Question Workflow --- if pdf_path: st.success("PDF loaded successfully!") query_engine = create_query_engine(pdf_path, llm) query_tool = LlamaIndexTool.from_query_engine( query_engine, name="Document Query Tool", description="Tool to analyze and retrieve information from the uploaded document." ) # Define Agents and Tasks researcher = Agent( role="Document Analyst", goal="Analyze documents and answer questions", backstory="Expert at retrieving insights from documents.", verbose=True, allow_delegation=False, tools=[query_tool], llm=llm, ) task = Task( description="Answer user queries based on the uploaded document.", expected_output="Clear and concise answers to user questions.", agent=researcher, ) crew = Crew(agents=[researcher], tasks=[task], verbose=True) st.subheader("Ask a Question") user_question = st.text_input("Enter your question") if st.button("Get Answer"): with st.spinner("Processing your request..."): result = crew.kickoff(inputs={"question": user_question}) st.success("Here is the answer:") st.write(result) else: st.warning("Please upload a PDF or provide a valid URL to continue.") # --- Clean Up --- if pdf_path and os.path.exists(pdf_path): os.remove(pdf_path)